diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-21 00:42:20 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-21 00:42:20 +0000 |
commit | 6c1d4662afc6344ea7d98b5d1b248214ea0c7635 (patch) | |
tree | e6b29fb9ae4aeff58eb35947255375d2085bc709 /src/backend/optimizer/path/costsize.c | |
parent | 2676e11fdffb3ea1c56de9e22be4fb80b902f7fa (diff) |
Finish implementation of hashed aggregation. Add enable_hashagg GUC
parameter to allow it to be forced off for comparison purposes.
Add ORDER BY clauses to a bunch of regression test queries that will
otherwise produce randomly-ordered output in the new regime.
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 106 |
1 files changed, 97 insertions, 9 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 9f987a4395b..6cf8b2af4b5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -79,6 +79,7 @@ bool enable_seqscan = true; bool enable_indexscan = true; bool enable_tidscan = true; bool enable_sort = true; +bool enable_hashagg = true; bool enable_nestloop = true; bool enable_mergejoin = true; bool enable_hashjoin = true; @@ -423,10 +424,8 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) /* * cost_sort - * Determines and returns the cost of sorting a relation. - * - * The cost of supplying the input data is NOT included; the caller should - * add that cost to both startup and total costs returned from this routine! + * Determines and returns the cost of sorting a relation, including + * the cost of reading the input data. * * If the total volume of data to sort is less than SortMem, we will do * an in-memory sort, which requires no I/O and about t*log2(t) tuple @@ -449,6 +448,7 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) * the right ballpark in most cases. * * 'pathkeys' is a list of sort keys + * 'input_cost' is the total cost for reading the input data * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes * @@ -456,12 +456,14 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) * can't conveniently supply the sort keys. Since this routine doesn't * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. + * (Actually, the thing we'd most likely be interested in is just the number + * of sort keys, which all callers *could* supply.) */ void cost_sort(Path *path, Query *root, - List *pathkeys, double tuples, int width) + List *pathkeys, Cost input_cost, double tuples, int width) { - Cost startup_cost = 0; + Cost startup_cost = input_cost; Cost run_cost = 0; double nbytes = relation_byte_size(tuples, width); long sortmembytes = SortMem * 1024L; @@ -511,6 +513,92 @@ cost_sort(Path *path, Query *root, path->total_cost = startup_cost + run_cost; } +/* + * cost_agg + * Determines and returns the cost of performing an Agg plan node, + * including the cost of its input. + * + * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs + * are for appropriately-sorted input. + */ +void +cost_agg(Path *path, Query *root, + AggStrategy aggstrategy, int numAggs, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + + /* + * We charge one cpu_operator_cost per aggregate function per input + * tuple, and another one per output tuple (corresponding to transfn + * and finalfn calls respectively). If we are grouping, we charge an + * additional cpu_operator_cost per grouping column per input tuple + * for grouping comparisons. + * + * We will produce a single output tuple if not grouping, + * and a tuple per group otherwise. + */ + if (aggstrategy == AGG_PLAIN) + { + startup_cost = input_total_cost; + startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs; + /* we aren't grouping */ + total_cost = startup_cost; + } + else if (aggstrategy == AGG_SORTED) + { + /* Here we are able to deliver output on-the-fly */ + startup_cost = input_startup_cost; + total_cost = input_total_cost; + total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs; + total_cost += cpu_operator_cost * input_tuples * numGroupCols; + } + else + { + /* must be AGG_HASHED */ + startup_cost = input_total_cost; + startup_cost += cpu_operator_cost * input_tuples * numAggs; + startup_cost += cpu_operator_cost * input_tuples * numGroupCols; + total_cost = startup_cost; + total_cost += cpu_operator_cost * numGroups * numAggs; + } + + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* + * cost_group + * Determines and returns the cost of performing a Group plan node, + * including the cost of its input. + * + * Note: caller must ensure that input costs are for appropriately-sorted + * input. + */ +void +cost_group(Path *path, Query *root, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + + startup_cost = input_startup_cost; + total_cost = input_total_cost; + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We + * assume all columns get compared at most of the tuples. + */ + total_cost += cpu_operator_cost * input_tuples * numGroupCols; + + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} /* * cost_nestloop @@ -658,10 +746,10 @@ cost_mergejoin(Path *path, Query *root, */ if (outersortkeys) /* do we need to sort outer? */ { - startup_cost += outer_path->total_cost; cost_sort(&sort_path, root, outersortkeys, + outer_path->total_cost, outer_path->parent->rows, outer_path->parent->width); startup_cost += sort_path.startup_cost; @@ -677,10 +765,10 @@ cost_mergejoin(Path *path, Query *root, if (innersortkeys) /* do we need to sort inner? */ { - startup_cost += inner_path->total_cost; cost_sort(&sort_path, root, innersortkeys, + inner_path->total_cost, inner_path->parent->rows, inner_path->parent->width); startup_cost += sort_path.startup_cost; |