diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-06 00:00:45 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-06 00:00:45 +0000 |
commit | f6dba10e623fa575c8446f854aa63c97d4fedea3 (patch) | |
tree | 2c5eb86c8e2961c90b524b49f25e3c25efa6eee2 /src/backend/optimizer | |
parent | a8c18b980e1e00fe08ac02562f81e3e64d4e9fd4 (diff) |
First phase of implementing hash-based grouping/aggregation. An AGG plan
node now does its own grouping of the input rows, and has no need for a
preceding GROUP node in the plan pipeline. This allows elimination of
the misnamed tuplePerGroup option for GROUP, and actually saves more code
in nodeGroup.c than it costs in nodeAgg.c, as well as being presumably
faster. Restructure the API of query_planner so that we do not commit to
using a sorted or unsorted plan in query_planner; instead grouping_planner
makes the decision. (Right now it isn't any smarter than query_planner
was, but that will change as soon as it has the option to select a hash-
based aggregation step.) Despite all the hackery, no initdb needed since
only in-memory node types changed.
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/README | 21 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_misc.c | 140 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 25 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 133 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 264 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 310 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 38 |
7 files changed, 371 insertions, 560 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 26f23168861..698b831a9cf 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -219,11 +219,9 @@ planner() pull out constant quals, which can be used to gate execution of the whole plan (if any are found, we make a top-level Result node to do the gating) - make a simplified target list that only contains Vars, no expressions ----subplanner() - make list of base relations used in query - split up the qual into restrictions (a=1) and joins (b=c) - find qual clauses that enable merge and hash joins + make list of base relations used in query + split up the qual into restrictions (a=1) and joins (b=c) + find qual clauses that enable merge and hash joins ----make_one_rel() set_base_rel_pathlist() find scan and all index paths for each base relation @@ -239,7 +237,7 @@ planner() cheapest path for each newly constructed joinrel. Loop back if this wasn't the top join level. Back at query_planner: - put back constant quals and non-simplified target list + put back any constant quals by adding a Result node Back at grouping_planner: do grouping(GROUP) do aggregates @@ -257,8 +255,11 @@ RelOptInfo - a relation or joined relations JoinInfo - join clauses, including the relids needed for the join Path - every way to generate a RelOptInfo(sequential,index,joins) - SeqScan - a plain Path node with nodeTag = T_SeqScan + SeqScan - a plain Path node with pathtype = T_SeqScan IndexPath - index scans + TidPath - scan by CTID + AppendPath - append multiple subpaths together + ResultPath - a Result plan (used for variable-free tlist or qual) NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins @@ -276,10 +277,10 @@ generated during the optimization process are marked with their sort order It is also possible to avoid an explicit sort step to implement a user's ORDER BY clause if the final path has the right ordering already, so the -sort ordering is of interest even at the top level. subplanner() will +sort ordering is of interest even at the top level. query_planner() will look for the cheapest path with a sort order matching the desired order, -and will compare its cost to the cost of using the cheapest-overall path -and doing an explicit sort. +and grouping_planner() will compare its cost to the cost of using the +cheapest-overall path and doing an explicit sort. When we are generating paths for a particular RelOptInfo, we discard a path if it is more expensive than another known path that has the same or better diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index ef7b489f591..7cda46946bc 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_misc.c,v 1.34 2002/09/04 20:31:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.35 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,19 +19,17 @@ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= */ - - #include "postgres.h" #include "optimizer/geqo_misc.h" #include "nodes/print.h" + #ifdef GEQO_DEBUG -static float avg_pool(Pool *pool); -/* avg_pool - * +/* + * avg_pool */ static float avg_pool(Pool *pool) @@ -81,7 +79,6 @@ print_pool(FILE *fp, Pool *pool, int start, int stop) /* print_gen * * printout for chromosome: best, worst, mean, average - * */ void print_gen(FILE *fp, Pool *pool, int generation) @@ -121,133 +118,4 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene) fprintf(fp, "\n"); } -/************************************************************* - Debug output subroutines - *************************************************************/ - -void -geqo_print_joinclauses(Query *root, List *clauses) -{ - List *l; - - foreach(l, clauses) - { - RestrictInfo *c = lfirst(l); - - print_expr((Node *) c->clause, root->rtable); - if (lnext(l)) - printf(" "); - } -} - -void -geqo_print_path(Query *root, Path *path, int indent) -{ - char *ptype = NULL; - JoinPath *jp; - bool join = false; - int i; - - for (i = 0; i < indent; i++) - printf("\t"); - - switch (nodeTag(path)) - { - case T_Path: - ptype = "SeqScan"; - join = false; - break; - case T_IndexPath: - ptype = "IdxScan"; - join = false; - break; - case T_NestPath: - ptype = "Nestloop"; - join = true; - break; - case T_MergePath: - ptype = "MergeJoin"; - join = true; - break; - case T_HashPath: - ptype = "HashJoin"; - join = true; - break; - default: - break; - } - if (join) - { - jp = (JoinPath *) path; - printf("%s rows=%.0f cost=%.2f..%.2f\n", - ptype, path->parent->rows, - path->startup_cost, path->total_cost); - switch (nodeTag(path)) - { - case T_MergePath: - case T_HashPath: - for (i = 0; i < indent + 1; i++) - printf("\t"); - printf(" clauses=("); - geqo_print_joinclauses(root, jp->joinrestrictinfo); - printf(")\n"); - - if (nodeTag(path) == T_MergePath) - { - MergePath *mp = (MergePath *) path; - - if (mp->outersortkeys || mp->innersortkeys) - { - for (i = 0; i < indent + 1; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0)); - } - } - break; - default: - break; - } - geqo_print_path(root, jp->outerjoinpath, indent + 1); - geqo_print_path(root, jp->innerjoinpath, indent + 1); - } - else - { - int relid = lfirsti(path->parent->relids); - - printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n", - ptype, relid, path->parent->rows, - path->startup_cost, path->total_cost); - - if (IsA(path, IndexPath)) - { - printf(" pathkeys="); - print_pathkeys(path->pathkeys, root->rtable); - } - } -} - -void -geqo_print_rel(Query *root, RelOptInfo *rel) -{ - List *l; - - printf("______________________________\n"); - printf("("); - foreach(l, rel->relids) - printf("%d ", lfirsti(l)); - printf("): rows=%.0f width=%d\n", rel->rows, rel->width); - - printf("\tpath list:\n"); - foreach(l, rel->pathlist) - geqo_print_path(root, lfirst(l), 1); - - printf("\n\tcheapest startup path:\n"); - geqo_print_path(root, rel->cheapest_startup_path, 1); - - printf("\n\tcheapest total path:\n"); - geqo_print_path(root, rel->cheapest_total_path, 1); -} - #endif /* GEQO_DEBUG */ diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 7d8d6a6beba..ea016e8a2e9 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.88 2002/09/04 20:31:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.89 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -742,6 +742,14 @@ print_path(Query *root, Path *path, int indent) ptype = "TidScan"; join = false; break; + case T_AppendPath: + ptype = "Append"; + join = false; + break; + case T_ResultPath: + ptype = "Result"; + join = false; + break; case T_NestPath: ptype = "Nestloop"; join = true; @@ -762,10 +770,15 @@ print_path(Query *root, Path *path, int indent) for (i = 0; i < indent; i++) printf("\t"); - printf("%s(", ptype); - print_relids(path->parent->relids); - printf(") rows=%.0f cost=%.2f..%.2f\n", - path->parent->rows, path->startup_cost, path->total_cost); + printf("%s", ptype); + + if (path->parent) + { + printf("("); + print_relids(path->parent->relids); + printf(") rows=%.0f", path->parent->rows); + } + printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); if (path->pathkeys) { @@ -785,7 +798,7 @@ print_path(Query *root, Path *path, int indent) print_restrictclauses(root, jp->joinrestrictinfo); printf("\n"); - if (nodeTag(path) == T_MergePath) + if (IsA(path, MergePath)) { MergePath *mp = (MergePath *) path; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9cdbcc2e5e5..5a2acbd2763 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.119 2002/09/18 21:35:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,6 +34,7 @@ static Scan *create_scan_plan(Query *root, Path *best_path); static Join *create_join_plan(Query *root, JoinPath *best_path); static Append *create_append_plan(Query *root, AppendPath *best_path); +static Result *create_result_plan(Query *root, ResultPath *best_path); static SeqScan *create_seqscan_plan(Path *best_path, List *tlist, List *scan_clauses); static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path, @@ -135,6 +136,10 @@ create_plan(Query *root, Path *best_path) plan = (Plan *) create_append_plan(root, (AppendPath *) best_path); break; + case T_Result: + plan = (Plan *) create_result_plan(root, + (ResultPath *) best_path); + break; default: elog(ERROR, "create_plan: unknown pathtype %d", best_path->pathtype); @@ -342,6 +347,35 @@ create_append_plan(Query *root, AppendPath *best_path) return plan; } +/* + * create_result_plan + * Create a Result plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Result * +create_result_plan(Query *root, ResultPath *best_path) +{ + Result *plan; + List *tlist; + Plan *subplan; + + if (best_path->path.parent) + tlist = best_path->path.parent->targetlist; + else + tlist = NIL; /* will be filled in later */ + + if (best_path->subpath) + subplan = create_plan(root, best_path->subpath); + else + subplan = NULL; + + plan = make_result(tlist, (Node *) best_path->constantqual, subplan); + + return plan; +} + /***************************************************************************** * @@ -1605,11 +1639,16 @@ make_material(List *tlist, Plan *lefttree) } Agg * -make_agg(List *tlist, List *qual, Plan *lefttree) +make_agg(List *tlist, List *qual, AggStrategy aggstrategy, + int ngrp, AttrNumber *grpColIdx, Plan *lefttree) { Agg *node = makeNode(Agg); Plan *plan = &node->plan; + node->aggstrategy = aggstrategy; + node->numCols = ngrp; + node->grpColIdx = grpColIdx; + copy_plan_costsize(plan, lefttree); /* @@ -1621,22 +1660,21 @@ make_agg(List *tlist, List *qual, Plan *lefttree) length(pull_agg_clause((Node *) qual))); /* - * We will produce a single output tuple if the input is not a Group, + * We will produce a single output tuple if not grouping, * and a tuple per group otherwise. For now, estimate the number of * groups as 10% of the number of tuples --- bogus, but how to do - * better? (Note we assume the input Group node is in "tuplePerGroup" - * mode, so it didn't reduce its row count already.) + * better? */ - if (IsA(lefttree, Group)) + if (aggstrategy == AGG_PLAIN) { - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; + plan->plan_rows = 1; + plan->startup_cost = plan->total_cost; } else { - plan->plan_rows = 1; - plan->startup_cost = plan->total_cost; + plan->plan_rows *= 0.1; + if (plan->plan_rows < 1) + plan->plan_rows = 1; } plan->state = (EState *) NULL; @@ -1650,7 +1688,6 @@ make_agg(List *tlist, List *qual, Plan *lefttree) Group * make_group(List *tlist, - bool tuplePerGroup, int ngrp, AttrNumber *grpColIdx, Plan *lefttree) @@ -1667,25 +1704,18 @@ make_group(List *tlist, plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp; /* - * If tuplePerGroup (which is named exactly backwards) is true, we - * will return all the input tuples, so the input node's row count is - * OK. Otherwise, we'll return only one tuple from each group. For - * now, estimate the number of groups as 10% of the number of tuples + * Estimate the number of groups as 10% of the number of tuples * --- bogus, but how to do better? */ - if (!tuplePerGroup) - { - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } + plan->plan_rows *= 0.1; + if (plan->plan_rows < 1) + plan->plan_rows = 1; plan->state = (EState *) NULL; plan->qual = NULL; plan->targetlist = tlist; plan->lefttree = lefttree; plan->righttree = (Plan *) NULL; - node->tuplePerGroup = tuplePerGroup; node->numCols = ngrp; node->grpColIdx = grpColIdx; @@ -1883,9 +1913,6 @@ make_result(List *tlist, Result *node = makeNode(Result); Plan *plan = &node->plan; -#ifdef NOT_USED - tlist = generate_fjoin(tlist); -#endif if (subplan) copy_plan_costsize(plan, subplan); else @@ -1906,57 +1933,3 @@ make_result(List *tlist, return node; } - -#ifdef NOT_USED -List * -generate_fjoin(List *tlist) -{ - List tlistP; - List newTlist = NIL; - List fjoinList = NIL; - int nIters = 0; - - /* - * Break the target list into elements with Iter nodes, and those - * without them. - */ - foreach(tlistP, tlist) - { - List tlistElem; - - tlistElem = lfirst(tlistP); - if (IsA(lsecond(tlistElem), Iter)) - { - nIters++; - fjoinList = lappend(fjoinList, tlistElem); - } - else - newTlist = lappend(newTlist, tlistElem); - } - - /* - * if we have an Iter node then we need to flatten. - */ - if (nIters > 0) - { - List *inner; - List *tempList; - Fjoin *fjoinNode; - DatumPtr results = (DatumPtr) palloc(nIters * sizeof(Datum)); - BoolPtr alwaysDone = (BoolPtr) palloc(nIters * sizeof(bool)); - - inner = lfirst(fjoinList); - fjoinList = lnext(fjoinList); - fjoinNode = (Fjoin) MakeFjoin(false, - nIters, - inner, - results, - alwaysDone); - tempList = lcons(fjoinNode, fjoinList); - newTlist = lappend(newTlist, tempList); - } - return newTlist; - return tlist; /* do nothing for now - ay 10/94 */ -} - -#endif diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 41f914b8f91..4354a5eb035 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,15 +14,13 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.70 2002/09/02 02:47:02 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" - #include "optimizer/clauses.h" -#include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" @@ -31,31 +29,37 @@ #include "utils/memutils.h" -static Plan *subplanner(Query *root, List *flat_tlist, - double tuple_fraction); - - /*-------------------- * query_planner - * Generate a plan for a basic query, which may involve joins but - * not any fancier features. + * Generate a path (that is, a simplified plan) for a basic query, + * which may involve joins but not any fancier features. * + * Since query_planner does not handle the toplevel processing (grouping, + * sorting, etc) it cannot select the best path by itself. It selects + * two paths: the cheapest path that produces the required tuples, independent + * of any ordering considerations, and the cheapest path that produces the + * required tuples in the required ordering, if there is a path that + * can produce them without an explicit top-level sort step. The caller + * (grouping_planner) will make the final decision about which to use. + * + * Input parameters: + * root is the query to plan * tlist is the target list the query should produce (NOT root->targetList!) * tuple_fraction is the fraction of tuples we expect will be retrieved * - * Note: the Query node now also includes a query_pathkeys field, which - * is both an input and an output of query_planner(). The input value - * signals query_planner that the indicated sort order is wanted in the - * final output plan. The output value is the actual pathkeys of the - * selected path. This might not be the same as what the caller requested; - * the caller must do pathkeys_contained_in() to decide whether an - * explicit sort is still needed. (The main reason query_pathkeys is a - * Query field and not a passed parameter is that the low-level routines - * in indxpath.c need to see it.) The pathkeys value passed to query_planner - * has not yet been "canonicalized", since the necessary info does not get - * computed until subplanner() scans the qual clauses. We canonicalize it - * inside subplanner() as soon as that task is done. The output value - * will be in canonical form as well. + * Output parameters: + * *cheapest_path receives the overall-cheapest path for the query + * *sorted_path receives the cheapest presorted path for the query, + * if any (it may be NULL, or the same as cheapest_path) + * + * Note: the Query node also includes a query_pathkeys field, which is both + * an input and an output of query_planner(). The input value signals + * query_planner that the indicated sort order is wanted in the final output + * plan. But this value has not yet been "canonicalized", since the needed + * info does not get computed until we scan the qual clauses. We canonicalize + * it as soon as that task is done. (The main reason query_pathkeys is a + * Query field and not a passed parameter is that the low-level routines in + * indxpath.c need to see it.) * * tuple_fraction is interpreted as follows: * 0 (or less): expect all tuples to be retrieved (normal case) @@ -66,18 +70,14 @@ static Plan *subplanner(Query *root, List *flat_tlist, * Note that while this routine and its subroutines treat a negative * tuple_fraction the same as 0, grouping_planner has a different * interpretation. - * - * Returns a query plan. *-------------------- */ -Plan * -query_planner(Query *root, - List *tlist, - double tuple_fraction) +void +query_planner(Query *root, List *tlist, double tuple_fraction, + Path **cheapest_path, Path **sorted_path) { List *constant_quals; - List *var_only_tlist; - Plan *subplan; + RelOptInfo *final_rel; /* * If the query has an empty join tree, then it's something easy like @@ -85,11 +85,10 @@ query_planner(Query *root, */ if (root->jointree->fromlist == NIL) { - root->query_pathkeys = NIL; /* signal unordered result */ - - /* Make childless Result node to evaluate given tlist. */ - return (Plan *) make_result(tlist, root->jointree->quals, - (Plan *) NULL); + *cheapest_path = (Path *) create_result_path(NULL, NULL, + (List *) root->jointree->quals); + *sorted_path = NULL; + return; } /* @@ -107,80 +106,8 @@ query_planner(Query *root, &constant_quals); /* - * Create a target list that consists solely of (resdom var) target - * list entries, i.e., contains no arbitrary expressions. - * - * All subplan nodes will have "flat" (var-only) tlists. - * - * This implies that all expression evaluations are done at the root of - * the plan tree. Once upon a time there was code to try to push - * expensive function calls down to lower plan nodes, but that's dead - * code and has been for a long time... - */ - var_only_tlist = flatten_tlist(tlist); - - /* - * Choose the best access path and build a plan for it. + * init planner lists to empty */ - subplan = subplanner(root, var_only_tlist, tuple_fraction); - - /* - * Build a result node to control the plan if we have constant quals, - * or if the top-level plan node is one that cannot do expression - * evaluation (it won't be able to evaluate the requested tlist). - * Currently, the only plan node we might see here that falls into - * that category is Append. - * - * XXX future improvement: if the given tlist is flat anyway, we don't - * really need a Result node. - */ - if (constant_quals || IsA(subplan, Append)) - { - /* - * The result node will also be responsible for evaluating the - * originally requested tlist. - */ - subplan = (Plan *) make_result(tlist, - (Node *) constant_quals, - subplan); - } - else - { - /* - * Replace the toplevel plan node's flattened target list with the - * targetlist given by my caller, so that expressions are - * evaluated. - */ - subplan->targetlist = tlist; - } - - return subplan; -} - -/* - * subplanner - * - * Subplanner creates an entire plan consisting of joins and scans - * for processing a single level of attributes. - * - * flat_tlist is the flattened target list - * tuple_fraction is the fraction of tuples we expect will be retrieved - * - * See query_planner() comments about the interpretation of tuple_fraction. - * - * Returns a subplan. - */ -static Plan * -subplanner(Query *root, - List *flat_tlist, - double tuple_fraction) -{ - RelOptInfo *final_rel; - Plan *resultplan; - Path *cheapestpath; - Path *presortedpath; - - /* init lists to empty */ root->base_rel_list = NIL; root->other_rel_list = NIL; root->join_rel_list = NIL; @@ -197,8 +124,14 @@ subplanner(Query *root, * clauses are added to appropriate lists belonging to the mentioned * relations. We also build lists of equijoined keys for pathkey * construction. + * + * Note: all subplan nodes will have "flat" (var-only) tlists. + * This implies that all expression evaluations are done at the root of + * the plan tree. Once upon a time there was code to try to push + * expensive function calls down to lower plan nodes, but that's dead + * code and has been for a long time... */ - build_base_rel_tlists(root, flat_tlist); + build_base_rel_tlists(root, tlist); (void) distribute_quals_to_rels(root, (Node *) root->jointree); @@ -220,31 +153,8 @@ subplanner(Query *root, */ final_rel = make_one_rel(root); - if (!final_rel) - elog(ERROR, "subplanner: failed to construct a relation"); - -#ifdef NOT_USED /* fix xfunc */ - - /* - * Perform Predicate Migration on each path, to optimize and correctly - * assess the cost of each before choosing the cheapest one. -- JMH, - * 11/16/92 - * - * Needn't do so if the top rel is pruneable: that means there's no - * expensive functions left to pull up. -- JMH, 11/22/92 - */ - if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM && - XfuncMode != XFUNC_NOPULL && !final_rel->pruneable) - { - List *pathnode; - - foreach(pathnode, final_rel->pathlist) - { - if (xfunc_do_predmig((Path *) lfirst(pathnode))) - set_cheapest(final_rel); - } - } -#endif + if (!final_rel || !final_rel->cheapest_total_path) + elog(ERROR, "query_planner: failed to construct a relation"); /* * Now that we have an estimate of the final rel's size, we can @@ -255,75 +165,35 @@ subplanner(Query *root, tuple_fraction /= final_rel->rows; /* - * Determine the cheapest path, independently of any ordering - * considerations. We do, however, take into account whether the - * whole plan is expected to be evaluated or not. + * Pick out the cheapest-total path and the cheapest presorted path + * for the requested pathkeys (if there is one). We can take the + * tuple fraction into account when selecting the cheapest presorted + * path, but not when selecting the cheapest-total path, since if we + * have to sort then we'll have to fetch all the tuples. (But there's + * a special case: if query_pathkeys is NIL, meaning order doesn't + * matter, then the "cheapest presorted" path will be the cheapest + * overall for the tuple fraction.) */ - if (tuple_fraction <= 0.0 || tuple_fraction >= 1.0) - cheapestpath = final_rel->cheapest_total_path; - else - cheapestpath = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - NIL, - tuple_fraction); + *cheapest_path = final_rel->cheapest_total_path; - Assert(cheapestpath != NULL); - - /* - * Select the best path and create a subplan to execute it. - * - * If no special sort order is wanted, or if the cheapest path is already - * appropriately ordered, we use the cheapest path found above. - */ - if (root->query_pathkeys == NIL || - pathkeys_contained_in(root->query_pathkeys, - cheapestpath->pathkeys)) - { - root->query_pathkeys = cheapestpath->pathkeys; - resultplan = create_plan(root, cheapestpath); - goto plan_built; - } - - /* - * Otherwise, look to see if we have an already-ordered path that is - * cheaper than doing an explicit sort on the cheapest-total-cost - * path. - */ - cheapestpath = final_rel->cheapest_total_path; - presortedpath = + *sorted_path = get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, root->query_pathkeys, tuple_fraction); - if (presortedpath) - { - Path sort_path; /* dummy for result of cost_sort */ - - cost_sort(&sort_path, root, root->query_pathkeys, - final_rel->rows, final_rel->width); - sort_path.startup_cost += cheapestpath->total_cost; - sort_path.total_cost += cheapestpath->total_cost; - if (compare_fractional_path_costs(presortedpath, &sort_path, - tuple_fraction) <= 0) - { - /* Presorted path is cheaper, use it */ - root->query_pathkeys = presortedpath->pathkeys; - resultplan = create_plan(root, presortedpath); - goto plan_built; - } - /* otherwise, doing it the hard way is still cheaper */ - } /* - * Nothing for it but to sort the cheapest-total-cost path --- but we - * let the caller do that. grouping_planner has to be able to add a - * sort node anyway, so no need for extra code here. (Furthermore, - * the given pathkeys might involve something we can't compute here, - * such as an aggregate function...) + * If we have constant quals, add a toplevel Result step to process them. */ - root->query_pathkeys = cheapestpath->pathkeys; - resultplan = create_plan(root, cheapestpath); - -plan_built: - - return resultplan; + if (constant_quals) + { + *cheapest_path = (Path *) + create_result_path((*cheapest_path)->parent, + *cheapest_path, + constant_quals); + if (*sorted_path) + *sorted_path = (Path *) + create_result_path((*sorted_path)->parent, + *sorted_path, + constant_quals); + } } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b607173a4c3..cc8e7a698d5 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.125 2002/09/24 18:38:23 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,8 @@ #include "nodes/print.h" #endif #include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" @@ -53,10 +55,10 @@ static Plan *inheritance_planner(Query *parse, List *inheritlist); static Plan *grouping_planner(Query *parse, double tuple_fraction); static List *make_subplanTargetList(Query *parse, List *tlist, AttrNumber **groupColIdx); -static Plan *make_groupplan(Query *parse, - List *group_tlist, bool tuplePerGroup, - List *groupClause, AttrNumber *grpColIdx, - bool is_presorted, Plan *subplan); +static Plan *make_groupsortplan(Query *parse, + List *groupClause, + AttrNumber *grpColIdx, + Plan *subplan); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); @@ -877,9 +879,7 @@ grouping_planner(Query *parse, double tuple_fraction) List *tlist = parse->targetList; Plan *result_plan; List *current_pathkeys; - List *group_pathkeys; List *sort_pathkeys; - AttrNumber *groupColIdx = NULL; if (parse->setOperations) { @@ -917,17 +917,20 @@ grouping_planner(Query *parse, double tuple_fraction) current_pathkeys = NIL; /* - * Calculate pathkeys that represent grouping/ordering - * requirements (grouping should always be null, but...) + * Calculate pathkeys that represent ordering requirements */ - group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, - tlist); sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, tlist); + sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); } else { + /* No set operations, do regular planning */ List *sub_tlist; + List *group_pathkeys; + AttrNumber *groupColIdx = NULL; + Path *cheapest_path; + Path *sorted_path; /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ tlist = preprocess_targetlist(tlist, @@ -1192,99 +1195,162 @@ grouping_planner(Query *parse, double tuple_fraction) tuple_fraction = 0.25; } - /* Generate the basic plan for this Query */ - result_plan = query_planner(parse, - sub_tlist, - tuple_fraction); - /* - * query_planner returns actual sort order (which is not - * necessarily what we requested) in query_pathkeys. + * Generate the best unsorted and presorted paths for this Query + * (but note there may not be any presorted path). */ - current_pathkeys = parse->query_pathkeys; - } - - /* - * We couldn't canonicalize group_pathkeys and sort_pathkeys before - * running query_planner(), so do it now. - */ - group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); - sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); - - /* - * If we have a GROUP BY clause, insert a group node (plus the - * appropriate sort node, if necessary). - */ - if (parse->groupClause) - { - bool tuplePerGroup; - List *group_tlist; - bool is_sorted; + query_planner(parse, sub_tlist, tuple_fraction, + &cheapest_path, &sorted_path); /* - * Decide whether how many tuples per group the Group node needs - * to return. (Needs only one tuple per group if no aggregate is - * present. Otherwise, need every tuple from the group to do the - * aggregation.) Note tuplePerGroup is named backwards :-( + * We couldn't canonicalize group_pathkeys and sort_pathkeys before + * running query_planner(), so do it now. */ - tuplePerGroup = parse->hasAggs; + group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); + sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); /* - * If there are aggregates then the Group node should just return - * the same set of vars as the subplan did. If there are no aggs - * then the Group node had better compute the final tlist. + * Select the best path and create a plan to execute it. + * + * If no special sort order is wanted, or if the cheapest path is + * already appropriately ordered, use the cheapest path. + * Otherwise, look to see if we have an already-ordered path that is + * cheaper than doing an explicit sort on the cheapest-total-cost + * path. */ - if (parse->hasAggs) - group_tlist = new_unsorted_tlist(result_plan->targetlist); + if (parse->query_pathkeys == NIL || + pathkeys_contained_in(parse->query_pathkeys, + cheapest_path->pathkeys)) + { + result_plan = create_plan(parse, cheapest_path); + current_pathkeys = cheapest_path->pathkeys; + } + else if (sorted_path) + { + Path sort_path; /* dummy for result of cost_sort */ + + cost_sort(&sort_path, parse, parse->query_pathkeys, + sorted_path->parent->rows, sorted_path->parent->width); + sort_path.startup_cost += cheapest_path->total_cost; + sort_path.total_cost += cheapest_path->total_cost; + if (compare_fractional_path_costs(sorted_path, &sort_path, + tuple_fraction) <= 0) + { + /* Presorted path is cheaper, use it */ + result_plan = create_plan(parse, sorted_path); + current_pathkeys = sorted_path->pathkeys; + } + else + { + /* otherwise, doing it the hard way is still cheaper */ + result_plan = create_plan(parse, cheapest_path); + current_pathkeys = cheapest_path->pathkeys; + } + } else - group_tlist = tlist; + { + /* + * No sorted path, so we must use the cheapest-total path. + * The actual sort step will be generated below. + */ + result_plan = create_plan(parse, cheapest_path); + current_pathkeys = cheapest_path->pathkeys; + } /* - * Figure out whether the path result is already ordered the way - * we need it --- if so, no need for an explicit sort step. + * create_plan() returns a plan with just a "flat" tlist of required + * Vars. We want to insert the sub_tlist as the tlist of the top + * plan node. If the top-level plan node is one that cannot do + * expression evaluation, we must insert a Result node to project the + * desired tlist. + * Currently, the only plan node we might see here that falls into + * that category is Append. */ - if (pathkeys_contained_in(group_pathkeys, current_pathkeys)) + if (IsA(result_plan, Append)) { - is_sorted = true; /* no sort needed now */ - /* current_pathkeys remains unchanged */ + result_plan = (Plan *) make_result(sub_tlist, NULL, result_plan); } else { /* - * We will need to do an explicit sort by the GROUP BY clause. - * make_groupplan will do the work, but set current_pathkeys - * to indicate the resulting order. + * Otherwise, just replace the flat tlist with the desired tlist. */ - is_sorted = false; - current_pathkeys = group_pathkeys; + result_plan->targetlist = sub_tlist; } - result_plan = make_groupplan(parse, - group_tlist, - tuplePerGroup, - parse->groupClause, - groupColIdx, - is_sorted, - result_plan); - } + /* + * If any aggregate is present, insert the Agg node, plus an explicit + * sort if necessary. + * + * HAVING clause, if any, becomes qual of the Agg node + */ + if (parse->hasAggs) + { + AggStrategy aggstrategy; - /* - * If aggregate is present, insert the Agg node - * - * HAVING clause, if any, becomes qual of the Agg node - */ - if (parse->hasAggs) - { - result_plan = (Plan *) make_agg(tlist, - (List *) parse->havingQual, - result_plan); - /* Note: Agg does not affect any existing sort order of the tuples */ - } - else - { - /* If there are no Aggs, we shouldn't have any HAVING qual anymore */ - Assert(parse->havingQual == NULL); - } + if (parse->groupClause) + { + aggstrategy = AGG_SORTED; + /* + * Add an explicit sort if we couldn't make the path come out + * the way the AGG node needs it. + */ + if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) + { + result_plan = make_groupsortplan(parse, + parse->groupClause, + groupColIdx, + result_plan); + current_pathkeys = group_pathkeys; + } + } + else + aggstrategy = AGG_PLAIN; + + result_plan = (Plan *) make_agg(tlist, + (List *) parse->havingQual, + aggstrategy, + length(parse->groupClause), + groupColIdx, + result_plan); + /* + * Note: plain or grouped Agg does not affect any existing + * sort order of the tuples + */ + } + else + { + /* + * If there are no Aggs, we shouldn't have any HAVING qual anymore + */ + Assert(parse->havingQual == NULL); + + /* + * If we have a GROUP BY clause, insert a group node (plus the + * appropriate sort node, if necessary). + */ + if (parse->groupClause) + { + /* + * Add an explicit sort if we couldn't make the path come out + * the way the GROUP node needs it. + */ + if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) + { + result_plan = make_groupsortplan(parse, + parse->groupClause, + groupColIdx, + result_plan); + current_pathkeys = group_pathkeys; + } + + result_plan = (Plan *) make_group(tlist, + length(parse->groupClause), + groupColIdx, + result_plan); + } + } + } /* end of if (setOperations) */ /* * If we were not able to make the plan come out in the right order, @@ -1323,7 +1389,7 @@ grouping_planner(Query *parse, double tuple_fraction) * make_subplanTargetList * Generate appropriate target list when grouping is required. * - * When grouping_planner inserts Aggregate and/or Group plan nodes above + * When grouping_planner inserts Aggregate or Group plan nodes above * the result of query_planner, we typically want to pass a different * target list to query_planner than the outer plan nodes should have. * This routine generates the correct target list for the subplan. @@ -1433,62 +1499,48 @@ make_subplanTargetList(Query *parse, } /* - * make_groupplan - * Add a Group node for GROUP BY processing. - * If we couldn't make the subplan produce presorted output for grouping, - * first add an explicit Sort node. + * make_groupsortplan + * Add a Sort node to explicitly sort according to the GROUP BY clause. + * + * Note: the Sort node always just takes a copy of the subplan's tlist + * plus ordering information. (This might seem inefficient if the + * subplan contains complex GROUP BY expressions, but in fact Sort + * does not evaluate its targetlist --- it only outputs the same + * tuples in a new order. So the expressions we might be copying + * are just dummies with no extra execution cost.) */ static Plan * -make_groupplan(Query *parse, - List *group_tlist, - bool tuplePerGroup, - List *groupClause, - AttrNumber *grpColIdx, - bool is_presorted, - Plan *subplan) +make_groupsortplan(Query *parse, + List *groupClause, + AttrNumber *grpColIdx, + Plan *subplan) { - int numCols = length(groupClause); + List *sort_tlist = new_unsorted_tlist(subplan->targetlist); + int keyno = 0; + List *gl; - if (!is_presorted) + foreach(gl, groupClause) { + GroupClause *grpcl = (GroupClause *) lfirst(gl); + TargetEntry *te = nth(grpColIdx[keyno] - 1, sort_tlist); + Resdom *resdom = te->resdom; + /* - * The Sort node always just takes a copy of the subplan's tlist - * plus ordering information. (This might seem inefficient if the - * subplan contains complex GROUP BY expressions, but in fact Sort - * does not evaluate its targetlist --- it only outputs the same - * tuples in a new order. So the expressions we might be copying - * are just dummies with no extra execution cost.) + * Check for the possibility of duplicate group-by clauses --- + * the parser should have removed 'em, but the Sort executor + * will get terribly confused if any get through! */ - List *sort_tlist = new_unsorted_tlist(subplan->targetlist); - int keyno = 0; - List *gl; - - foreach(gl, groupClause) + if (resdom->reskey == 0) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); - TargetEntry *te = nth(grpColIdx[keyno] - 1, sort_tlist); - Resdom *resdom = te->resdom; - - /* - * Check for the possibility of duplicate group-by clauses --- - * the parser should have removed 'em, but the Sort executor - * will get terribly confused if any get through! - */ - if (resdom->reskey == 0) - { - /* OK, insert the ordering info needed by the executor. */ - resdom->reskey = ++keyno; - resdom->reskeyop = grpcl->sortop; - } + /* OK, insert the ordering info needed by the executor. */ + resdom->reskey = ++keyno; + resdom->reskeyop = grpcl->sortop; } - - Assert(keyno > 0); - - subplan = (Plan *) make_sort(parse, sort_tlist, subplan, keyno); } - return (Plan *) make_group(group_tlist, tuplePerGroup, numCols, - grpColIdx, subplan); + Assert(keyno > 0); + + return (Plan *) make_sort(parse, sort_tlist, subplan, keyno); } /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 4b3c9809b8b..7dd0dce6891 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.78 2002/06/20 20:29:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.79 2002/11/06 00:00:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -405,7 +405,6 @@ create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval) * create_append_path * Creates a path corresponding to an Append plan, returning the * pathnode. - * */ AppendPath * create_append_path(RelOptInfo *rel, List *subpaths) @@ -434,6 +433,41 @@ create_append_path(RelOptInfo *rel, List *subpaths) } /* + * create_result_path + * Creates a path corresponding to a Result plan, returning the + * pathnode. + */ +ResultPath * +create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual) +{ + ResultPath *pathnode = makeNode(ResultPath); + + pathnode->path.pathtype = T_Result; + pathnode->path.parent = rel; /* may be NULL */ + + if (subpath) + pathnode->path.pathkeys = subpath->pathkeys; + else + pathnode->path.pathkeys = NIL; + + pathnode->subpath = subpath; + pathnode->constantqual = constantqual; + + if (subpath) + { + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + } + else + { + pathnode->path.startup_cost = 0; + pathnode->path.total_cost = cpu_tuple_cost; + } + + return pathnode; +} + +/* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery, * returning the pathnode. |