diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 3 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planagg.c | 69 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 66 | ||||
-rw-r--r-- | src/backend/optimizer/prep/Makefile | 1 | ||||
-rw-r--r-- | src/backend/optimizer/prep/prepagg.c | 678 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 288 |
6 files changed, 729 insertions, 376 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f1dfdc1a4a1..22d6935824a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2439,7 +2439,8 @@ cost_agg(Path *path, PlannerInfo *root, * than or equal to one, all groups are expected to fit in memory; * otherwise we expect to spill. */ - hashentrysize = hash_agg_entry_size(aggcosts->numAggs, input_width, + hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos), + input_width, aggcosts->transitionSpace); hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit, &ngroups_limit, &num_partitions); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 8634940efc1..48c4fee8923 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -47,7 +47,7 @@ #include "utils/lsyscache.h" #include "utils/syscache.h" -static bool find_minmax_aggs_walker(Node *node, List **context); +static bool can_minmax_aggs(PlannerInfo *root, List **context); static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, Oid eqop, Oid sortop, bool nulls_first); static void minmax_qp_callback(PlannerInfo *root, void *extra); @@ -66,7 +66,8 @@ static Oid fetch_agg_sort_op(Oid aggfnoid); * query_planner(), because we generate indexscan paths by cloning the * planner's state and invoking query_planner() on a modified version of * the query parsetree. Thus, all preprocessing needed before query_planner() - * must already be done. + * must already be done. This relies on the list of aggregates in + * root->agginfos, so preprocess_aggrefs() must have been called already, too. */ void preprocess_minmax_aggregates(PlannerInfo *root) @@ -140,9 +141,7 @@ preprocess_minmax_aggregates(PlannerInfo *root) * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. */ aggs_list = NIL; - if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list)) - return; - if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) + if (!can_minmax_aggs(root, &aggs_list)) return; /* @@ -227,38 +226,33 @@ preprocess_minmax_aggregates(PlannerInfo *root) } /* - * find_minmax_aggs_walker - * Recursively scan the Aggref nodes in an expression tree, and check - * that each one is a MIN/MAX aggregate. If so, build a list of the + * can_minmax_aggs + * Walk through all the aggregates in the query, and check + * if they are all MIN/MAX aggregates. If so, build a list of the * distinct aggregate calls in the tree. * - * Returns true if a non-MIN/MAX aggregate is found, false otherwise. - * (This seemingly-backward definition is used because expression_tree_walker - * aborts the scan on true return, which is what we want.) - * - * Found aggregates are added to the list at *context; it's up to the caller - * to initialize the list to NIL. + * Returns false if a non-MIN/MAX aggregate is found, true otherwise. * * This does not descend into subqueries, and so should be used only after * reduction of sublinks to subplans. There mustn't be outer-aggregate * references either. */ static bool -find_minmax_aggs_walker(Node *node, List **context) +can_minmax_aggs(PlannerInfo *root, List **context) { - if (node == NULL) - return false; - if (IsA(node, Aggref)) + ListCell *lc; + + foreach(lc, root->agginfos) { - Aggref *aggref = (Aggref *) node; + AggInfo *agginfo = (AggInfo *) lfirst(lc); + Aggref *aggref = agginfo->representative_aggref; Oid aggsortop; TargetEntry *curTarget; MinMaxAggInfo *mminfo; - ListCell *l; Assert(aggref->agglevelsup == 0); if (list_length(aggref->args) != 1) - return true; /* it couldn't be MIN/MAX */ + return false; /* it couldn't be MIN/MAX */ /* * ORDER BY is usually irrelevant for MIN/MAX, but it can change the @@ -274,7 +268,7 @@ find_minmax_aggs_walker(Node *node, List **context) * quickly. */ if (aggref->aggorder != NIL) - return true; + return false; /* note: we do not care if DISTINCT is mentioned ... */ /* @@ -283,30 +277,19 @@ find_minmax_aggs_walker(Node *node, List **context) * now, just punt. */ if (aggref->aggfilter != NULL) - return true; + return false; aggsortop = fetch_agg_sort_op(aggref->aggfnoid); if (!OidIsValid(aggsortop)) - return true; /* not a MIN/MAX aggregate */ + return false; /* not a MIN/MAX aggregate */ curTarget = (TargetEntry *) linitial(aggref->args); if (contain_mutable_functions((Node *) curTarget->expr)) - return true; /* not potentially indexable */ + return false; /* not potentially indexable */ if (type_is_rowtype(exprType((Node *) curTarget->expr))) - return true; /* IS NOT NULL would have weird semantics */ - - /* - * Check whether it's already in the list, and add it if not. - */ - foreach(l, *context) - { - mminfo = (MinMaxAggInfo *) lfirst(l); - if (mminfo->aggfnoid == aggref->aggfnoid && - equal(mminfo->target, curTarget->expr)) - return false; - } + return false; /* IS NOT NULL would have weird semantics */ mminfo = makeNode(MinMaxAggInfo); mminfo->aggfnoid = aggref->aggfnoid; @@ -318,16 +301,8 @@ find_minmax_aggs_walker(Node *node, List **context) mminfo->param = NULL; *context = lappend(*context, mminfo); - - /* - * We need not recurse into the argument, since it can't contain any - * aggregates. - */ - return false; } - Assert(!IsA(node, SubLink)); - return expression_tree_walker(node, find_minmax_aggs_walker, - (void *) context); + return true; } /* @@ -368,6 +343,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, subroot->plan_params = NIL; subroot->outer_params = NULL; subroot->init_plans = NIL; + subroot->agginfos = NIL; + subroot->aggtransinfos = NIL; subroot->parse = parse = copyObject(root->parse); IncrementVarSublevelsUp((Node *) parse, 1, 1); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 986d7a52e32..247f7d46252 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -152,7 +152,6 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, bool target_parallel_safe, - const AggClauseCosts *agg_costs, grouping_sets_data *gd); static bool is_degenerate_grouping(PlannerInfo *root); static void create_degenerate_grouping_paths(PlannerInfo *root, @@ -228,8 +227,7 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, GroupPathExtraData *extra, bool force_rel_creation); static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); -static bool can_partial_agg(PlannerInfo *root, - const AggClauseCosts *agg_costs); +static bool can_partial_agg(PlannerInfo *root); static void apply_scanjoin_target_to_paths(PlannerInfo *root, RelOptInfo *rel, List *scanjoin_targets, @@ -1944,7 +1942,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, bool scanjoin_target_parallel_safe; bool scanjoin_target_same_exprs; bool have_grouping; - AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; grouping_sets_data *gset_data = NULL; @@ -1975,25 +1972,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, root->processed_tlist = preprocess_targetlist(root); /* - * Collect statistics about aggregates for estimating costs, and mark - * all the aggregates with resolved aggtranstypes. We must do this - * before slicing and dicing the tlist into various pathtargets, else - * some copies of the Aggref nodes might escape being marked with the - * correct transtypes. - * - * Note: currently, we do not detect duplicate aggregates here. This - * may result in somewhat-overestimated cost, which is fine for our - * purposes since all Paths will get charged the same. But at some - * point we might wish to do that detection in the planner, rather - * than during executor startup. + * Mark all the aggregates with resolved aggtranstypes, and detect + * aggregates that are duplicates or can share transition state. We + * must do this before slicing and dicing the tlist into various + * pathtargets, else some copies of the Aggref nodes might escape + * being marked. */ - MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); if (parse->hasAggs) { - get_agg_clause_costs(root, (Node *) root->processed_tlist, - AGGSPLIT_SIMPLE, &agg_costs); - get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE, - &agg_costs); + preprocess_aggrefs(root, (Node *) root->processed_tlist); + preprocess_aggrefs(root, (Node *) parse->havingQual); } /* @@ -2198,7 +2186,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel, grouping_target, grouping_target_parallel_safe, - &agg_costs, gset_data); /* Fix things up if grouping_target contains SRFs */ if (parse->hasTargetSRFs) @@ -3790,7 +3777,6 @@ get_number_of_groups(PlannerInfo *root, * * input_rel: contains the source-data Paths * target: the pathtarget for the result Paths to compute - * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode) * gd: grouping sets data including list of grouping sets and their clauses * * Note: all Paths in input_rel are expected to return the target computed @@ -3801,12 +3787,15 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, bool target_parallel_safe, - const AggClauseCosts *agg_costs, grouping_sets_data *gd) { Query *parse = root->parse; RelOptInfo *grouped_rel; RelOptInfo *partially_grouped_rel; + AggClauseCosts agg_costs; + + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs); /* * Create grouping relation to hold fully aggregated grouping and/or @@ -3862,14 +3851,14 @@ create_grouping_paths(PlannerInfo *root, * the other gating conditions, so we want to do it last. */ if ((parse->groupClause != NIL && - agg_costs->numOrderedAggs == 0 && + root->numOrderedAggs == 0 && (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) flags |= GROUPING_CAN_USE_HASH; /* * Determine whether partial aggregation is possible. */ - if (can_partial_agg(root, agg_costs)) + if (can_partial_agg(root)) flags |= GROUPING_CAN_PARTIAL_AGG; extra.flags = flags; @@ -3890,7 +3879,7 @@ create_grouping_paths(PlannerInfo *root, extra.patype = PARTITIONWISE_AGGREGATE_NONE; create_ordinary_grouping_paths(root, input_rel, grouped_rel, - agg_costs, gd, &extra, + &agg_costs, gd, &extra, &partially_grouped_rel); } @@ -4248,7 +4237,8 @@ consider_groupingsets_paths(PlannerInfo *root, l_start = lnext(gd->rollups, l_start); } - hashsize = estimate_hashagg_tablesize(path, + hashsize = estimate_hashagg_tablesize(root, + path, agg_costs, dNumGroups - exclude_groups); @@ -4382,7 +4372,8 @@ consider_groupingsets_paths(PlannerInfo *root, /* * Account first for space needed for groups we can't sort at all. */ - availspace -= estimate_hashagg_tablesize(path, + availspace -= estimate_hashagg_tablesize(root, + path, agg_costs, gd->dNumHashGroups); @@ -4433,7 +4424,8 @@ consider_groupingsets_paths(PlannerInfo *root, if (rollup->hashable) { - double sz = estimate_hashagg_tablesize(path, + double sz = estimate_hashagg_tablesize(root, + path, agg_costs, rollup->numGroups); @@ -6926,20 +6918,12 @@ create_partial_grouping_paths(PlannerInfo *root, MemSet(agg_final_costs, 0, sizeof(AggClauseCosts)); if (parse->hasAggs) { - List *partial_target_exprs; - /* partial phase */ - partial_target_exprs = partially_grouped_rel->reltarget->exprs; - get_agg_clause_costs(root, (Node *) partial_target_exprs, - AGGSPLIT_INITIAL_SERIAL, + get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, agg_partial_costs); /* final phase */ - get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, - AGGSPLIT_FINAL_DESERIAL, - agg_final_costs); - get_agg_clause_costs(root, extra->havingQual, - AGGSPLIT_FINAL_DESERIAL, + get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL, agg_final_costs); } @@ -7324,7 +7308,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) * Returns true when possible, false otherwise. */ static bool -can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) +can_partial_agg(PlannerInfo *root) { Query *parse = root->parse; @@ -7341,7 +7325,7 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) /* We don't know how to do grouping sets in parallel. */ return false; } - else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial) + else if (root->hasNonPartialAggs || root->hasNonSerialAggs) { /* Insufficient support for partial mode. */ return false; diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile index 5733df45737..6f8c6c8208b 100644 --- a/src/backend/optimizer/prep/Makefile +++ b/src/backend/optimizer/prep/Makefile @@ -13,6 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = \ + prepagg.o \ prepjointree.o \ prepqual.o \ preptlist.o \ diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c new file mode 100644 index 00000000000..34ac985a664 --- /dev/null +++ b/src/backend/optimizer/prep/prepagg.c @@ -0,0 +1,678 @@ +/*------------------------------------------------------------------------- + * + * prepagg.c + * Routines to preprocess aggregate function calls + * + * If there are identical aggregate calls in the query, they only need to + * be computed once. Also, some aggregate functions can share the same + * transition state, so that we only need to call the final function for + * them separately. These optimizations are independent of how the + * aggregates are executed. + * + * preprocess_aggrefs() detects those cases, creates AggInfo and + * AggTransInfo structs for each aggregate and transition state that needs + * to be computed, and sets the 'aggno' and 'transno' fields in the Aggrefs + * accordingly. It also resolves polymorphic transition types, and sets + * the 'aggtranstype' fields accordingly. + * + * XXX: The AggInfo and AggTransInfo structs are thrown away after + * planning, so executor startup has to perform some of the same lookups + * of transition functions and initial values that we do here. One day, we + * might want to carry that information to the Agg nodes to save the effort + * at executor startup. The Agg nodes are constructed much later in the + * planning, however, so it's not trivial. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/prep/prepagg.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_type.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pathnodes.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/plancat.h" +#include "optimizer/prep.h" +#include "parser/parse_agg.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" + +static bool preprocess_aggrefs_walker(Node *node, PlannerInfo *root); +static int find_compatible_agg(PlannerInfo *root, Aggref *newagg, + List **same_input_transnos); +static int find_compatible_trans(PlannerInfo *root, Aggref *newagg, + bool shareable, + Oid aggtransfn, Oid aggtranstype, + int transtypeLen, bool transtypeByVal, + Oid aggcombinefn, + Oid aggserialfn, Oid aggdeserialfn, + Datum initValue, bool initValueIsNull, + List *transnos); +static Datum GetAggInitVal(Datum textInitVal, Oid transtype); + +/* ----------------- + * Resolve the transition type of all Aggrefs, and determine which Aggrefs + * can share aggregate or transition state. + * + * Information about the aggregates and transition functions are collected + * in the root->agginfos and root->aggtransinfos lists. The 'aggtranstype', + * 'aggno', and 'aggtransno' fields in are filled in in each Aggref. + * + * NOTE: This modifies the Aggrefs in the input expression in-place! + * + * We try to optimize by detecting duplicate aggregate functions so that + * their state and final values are re-used, rather than needlessly being + * re-calculated independently. We also detect aggregates that are not + * the same, but which can share the same transition state. + * + * Scenarios: + * + * 1. Identical aggregate function calls appear in the query: + * + * SELECT SUM(x) FROM ... HAVING SUM(x) > 0 + * + * Since these aggregates are identical, we only need to calculate + * the value once. Both aggregates will share the same 'aggno' value. + * + * 2. Two different aggregate functions appear in the query, but the + * aggregates have the same arguments, transition functions and + * initial values (and, presumably, different final functions): + * + * SELECT AVG(x), STDDEV(x) FROM ... + * + * In this case we must create a new AggInfo for the varying aggregate, + * and we need to call the final functions separately, but we need + * only run the transition function once. (This requires that the + * final functions be nondestructive of the transition state, but + * that's required anyway for other reasons.) + * + * For either of these optimizations to be valid, all aggregate properties + * used in the transition phase must be the same, including any modifiers + * such as ORDER BY, DISTINCT and FILTER, and the arguments mustn't + * contain any volatile functions. + * ----------------- + */ +void +preprocess_aggrefs(PlannerInfo *root, Node *clause) +{ + (void) preprocess_aggrefs_walker(clause, root); +} + +static void +preprocess_aggref(Aggref *aggref, PlannerInfo *root) +{ + HeapTuple aggTuple; + Form_pg_aggregate aggform; + Oid aggtransfn; + Oid aggfinalfn; + Oid aggcombinefn; + Oid aggserialfn; + Oid aggdeserialfn; + Oid aggtranstype; + int32 aggtranstypmod; + int32 aggtransspace; + bool shareable; + int aggno; + int transno; + List *same_input_transnos; + int16 resulttypeLen; + bool resulttypeByVal; + Datum textInitVal; + Datum initValue; + bool initValueIsNull; + bool transtypeByVal; + int16 transtypeLen; + Oid inputTypes[FUNC_MAX_ARGS]; + int numArguments; + + Assert(aggref->agglevelsup == 0); + + /* + * Fetch info about the aggregate from pg_aggregate. Note it's correct to + * ignore the moving-aggregate variant, since what we're concerned with + * here is aggregates not window functions. + */ + aggTuple = SearchSysCache1(AGGFNOID, + ObjectIdGetDatum(aggref->aggfnoid)); + if (!HeapTupleIsValid(aggTuple)) + elog(ERROR, "cache lookup failed for aggregate %u", + aggref->aggfnoid); + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + aggtransfn = aggform->aggtransfn; + aggfinalfn = aggform->aggfinalfn; + aggcombinefn = aggform->aggcombinefn; + aggserialfn = aggform->aggserialfn; + aggdeserialfn = aggform->aggdeserialfn; + aggtranstype = aggform->aggtranstype; + aggtransspace = aggform->aggtransspace; + + /* + * Resolve the possibly-polymorphic aggregate transition type. + */ + + /* extract argument types (ignoring any ORDER BY expressions) */ + numArguments = get_aggregate_argtypes(aggref, inputTypes); + + /* resolve actual type of transition state, if polymorphic */ + aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid, + aggtranstype, + inputTypes, + numArguments); + aggref->aggtranstype = aggtranstype; + + /* + * If transition state is of same type as first aggregated input, assume + * it's the same typmod (same width) as well. This works for cases like + * MAX/MIN and is probably somewhat reasonable otherwise. + */ + aggtranstypmod = -1; + if (aggref->args) + { + TargetEntry *tle = (TargetEntry *) linitial(aggref->args); + + if (aggtranstype == exprType((Node *) tle->expr)) + aggtranstypmod = exprTypmod((Node *) tle->expr); + } + + /* + * If finalfn is marked read-write, we can't share transition states; but + * it is okay to share states for AGGMODIFY_SHAREABLE aggs. + * + * In principle, in a partial aggregate, we could share the transition + * state even if the final function is marked as read-write, because the + * partial aggregate doesn't execute the final function. But it's too + * early to know whether we're going perform a partial aggregate. + */ + shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE); + + /* get info about the output value's datatype */ + get_typlenbyval(aggref->aggtype, + &resulttypeLen, + &resulttypeByVal); + + /* get initial value */ + textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple, + Anum_pg_aggregate_agginitval, + &initValueIsNull); + if (initValueIsNull) + initValue = (Datum) 0; + else + initValue = GetAggInitVal(textInitVal, aggtranstype); + + ReleaseSysCache(aggTuple); + + /* + * 1. See if this is identical to another aggregate function call that + * we've seen already. + */ + aggno = find_compatible_agg(root, aggref, &same_input_transnos); + if (aggno != -1) + { + AggInfo *agginfo = list_nth(root->agginfos, aggno); + + transno = agginfo->transno; + } + else + { + AggInfo *agginfo = palloc(sizeof(AggInfo)); + + agginfo->finalfn_oid = aggfinalfn; + agginfo->representative_aggref = aggref; + agginfo->shareable = shareable; + + aggno = list_length(root->agginfos); + root->agginfos = lappend(root->agginfos, agginfo); + + /* + * Count it, and check for cases requiring ordered input. Note that + * ordered-set aggs always have nonempty aggorder. Any ordered-input + * case also defeats partial aggregation. + */ + if (aggref->aggorder != NIL || aggref->aggdistinct != NIL) + { + root->numOrderedAggs++; + root->hasNonPartialAggs = true; + } + + get_typlenbyval(aggtranstype, + &transtypeLen, + &transtypeByVal); + + /* + * 2. See if this aggregate can share transition state with another + * aggregate that we've initialized already. + */ + transno = find_compatible_trans(root, aggref, shareable, + aggtransfn, aggtranstype, + transtypeLen, transtypeByVal, + aggcombinefn, + aggserialfn, aggdeserialfn, + initValue, initValueIsNull, + same_input_transnos); + if (transno == -1) + { + AggTransInfo *transinfo = palloc(sizeof(AggTransInfo)); + + transinfo->args = aggref->args; + transinfo->aggfilter = aggref->aggfilter; + transinfo->transfn_oid = aggtransfn; + transinfo->combinefn_oid = aggcombinefn; + transinfo->serialfn_oid = aggserialfn; + transinfo->deserialfn_oid = aggdeserialfn; + transinfo->aggtranstype = aggtranstype; + transinfo->aggtranstypmod = aggtranstypmod; + transinfo->transtypeLen = transtypeLen; + transinfo->transtypeByVal = transtypeByVal; + transinfo->aggtransspace = aggtransspace; + transinfo->initValue = initValue; + transinfo->initValueIsNull = initValueIsNull; + + transno = list_length(root->aggtransinfos); + root->aggtransinfos = lappend(root->aggtransinfos, transinfo); + + /* + * Check whether partial aggregation is feasible, unless we + * already found out that we can't do it. + */ + if (!root->hasNonPartialAggs) + { + /* + * If there is no combine function, then partial aggregation + * is not possible. + */ + if (!OidIsValid(transinfo->combinefn_oid)) + root->hasNonPartialAggs = true; + + /* + * If we have any aggs with transtype INTERNAL then we must + * check whether they have serialization/deserialization + * functions; if not, we can't serialize partial-aggregation + * results. + */ + else if (transinfo->aggtranstype == INTERNALOID && + (!OidIsValid(transinfo->serialfn_oid) || + !OidIsValid(transinfo->deserialfn_oid))) + root->hasNonSerialAggs = true; + } + } + agginfo->transno = transno; + } + + /* + * Fill in the fields in the Aggref (aggtranstype was set above already) + */ + aggref->aggno = aggno; + aggref->aggtransno = transno; +} + +static bool +preprocess_aggrefs_walker(Node *node, PlannerInfo *root) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *aggref = (Aggref *) node; + + preprocess_aggref(aggref, root); + + /* + * We assume that the parser checked that there are no aggregates (of + * this level anyway) in the aggregated arguments, direct arguments, + * or filter clause. Hence, we need not recurse into any of them. + */ + return false; + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, preprocess_aggrefs_walker, + (void *) root); +} + + +/* + * find_compatible_agg - search for a previously initialized per-Agg struct + * + * Searches the previously looked at aggregates to find one which is compatible + * with this one, with the same input parameters. If no compatible aggregate + * can be found, returns -1. + * + * As a side-effect, this also collects a list of existing, shareable per-Trans + * structs with matching inputs. If no identical Aggref is found, the list is + * passed later to find_compatible_trans, to see if we can at least reuse + * the state value of another aggregate. + */ +static int +find_compatible_agg(PlannerInfo *root, Aggref *newagg, + List **same_input_transnos) +{ + ListCell *lc; + int aggno; + + *same_input_transnos = NIL; + + /* we mustn't reuse the aggref if it contains volatile function calls */ + if (contain_volatile_functions((Node *) newagg)) + return -1; + + /* + * Search through the list of already seen aggregates. If we find an + * existing identical aggregate call, then we can re-use that one. While + * searching, we'll also collect a list of Aggrefs with the same input + * parameters. If no matching Aggref is found, the caller can potentially + * still re-use the transition state of one of them. (At this stage we + * just compare the parsetrees; whether different aggregates share the + * same transition function will be checked later.) + */ + aggno = -1; + foreach(lc, root->agginfos) + { + AggInfo *agginfo = (AggInfo *) lfirst(lc); + Aggref *existingRef; + + aggno++; + + existingRef = agginfo->representative_aggref; + + /* all of the following must be the same or it's no match */ + if (newagg->inputcollid != existingRef->inputcollid || + newagg->aggtranstype != existingRef->aggtranstype || + newagg->aggstar != existingRef->aggstar || + newagg->aggvariadic != existingRef->aggvariadic || + newagg->aggkind != existingRef->aggkind || + !equal(newagg->args, existingRef->args) || + !equal(newagg->aggorder, existingRef->aggorder) || + !equal(newagg->aggdistinct, existingRef->aggdistinct) || + !equal(newagg->aggfilter, existingRef->aggfilter)) + continue; + + /* if it's the same aggregate function then report exact match */ + if (newagg->aggfnoid == existingRef->aggfnoid && + newagg->aggtype == existingRef->aggtype && + newagg->aggcollid == existingRef->aggcollid && + equal(newagg->aggdirectargs, existingRef->aggdirectargs)) + { + list_free(*same_input_transnos); + *same_input_transnos = NIL; + return aggno; + } + + /* + * Not identical, but it had the same inputs. If the final function + * permits sharing, return its transno to the caller, in case we can + * re-use its per-trans state. (If there's already sharing going on, + * we might report a transno more than once. find_compatible_trans is + * cheap enough that it's not worth spending cycles to avoid that.) + */ + if (agginfo->shareable) + *same_input_transnos = lappend_int(*same_input_transnos, + agginfo->transno); + } + + return -1; +} + +/* + * find_compatible_trans - search for a previously initialized per-Trans + * struct + * + * Searches the list of transnos for a per-Trans struct with the same + * transition function and initial condition. (The inputs have already been + * verified to match.) + */ +static int +find_compatible_trans(PlannerInfo *root, Aggref *newagg, bool shareable, + Oid aggtransfn, Oid aggtranstype, + int transtypeLen, bool transtypeByVal, + Oid aggcombinefn, + Oid aggserialfn, Oid aggdeserialfn, + Datum initValue, bool initValueIsNull, + List *transnos) +{ + ListCell *lc; + + /* If this aggregate can't share transition states, give up */ + if (!shareable) + return -1; + + foreach(lc, transnos) + { + int transno = lfirst_int(lc); + AggTransInfo *pertrans = (AggTransInfo *) list_nth(root->aggtransinfos, transno); + + /* + * if the transfns or transition state types are not the same then the + * state can't be shared. + */ + if (aggtransfn != pertrans->transfn_oid || + aggtranstype != pertrans->aggtranstype) + continue; + + /* + * The serialization and deserialization functions must match, if + * present, as we're unable to share the trans state for aggregates + * which will serialize or deserialize into different formats. + * Remember that these will be InvalidOid if they're not required for + * this agg node. + */ + if (aggserialfn != pertrans->serialfn_oid || + aggdeserialfn != pertrans->deserialfn_oid) + continue; + + /* + * Combine function must also match. We only care about the combine + * function with partial aggregates, but it's too early in the + * planning to know if we will do partial aggregation, so be + * conservative. + */ + if (aggcombinefn != pertrans->combinefn_oid) + continue; + + /* + * Check that the initial condition matches, too. + */ + if (initValueIsNull && pertrans->initValueIsNull) + return transno; + + if (!initValueIsNull && !pertrans->initValueIsNull && + datumIsEqual(initValue, pertrans->initValue, + transtypeByVal, transtypeLen)) + return transno; + } + return -1; +} + +static Datum +GetAggInitVal(Datum textInitVal, Oid transtype) +{ + Oid typinput, + typioparam; + char *strInitVal; + Datum initVal; + + getTypeInputInfo(transtype, &typinput, &typioparam); + strInitVal = TextDatumGetCString(textInitVal); + initVal = OidInputFunctionCall(typinput, strInitVal, + typioparam, -1); + pfree(strInitVal); + return initVal; +} + + +/* + * get_agg_clause_costs + * Recursively find the Aggref nodes in an expression tree, and + * accumulate cost information about them. + * + * 'aggsplit' tells us the expected partial-aggregation mode, which affects + * the cost estimates. + * + * NOTE that the counts/costs are ADDED to those already in *costs ... so + * the caller is responsible for zeroing the struct initially. + * + * We count the nodes, estimate their execution costs, and estimate the total + * space needed for their transition state values if all are evaluated in + * parallel (as would be done in a HashAgg plan). Also, we check whether + * partial aggregation is feasible. See AggClauseCosts for the exact set + * of statistics collected. + * + * In addition, we mark Aggref nodes with the correct aggtranstype, so + * that that doesn't need to be done repeatedly. (That makes this function's + * name a bit of a misnomer.) + * + * This does not descend into subqueries, and so should be used only after + * reduction of sublinks to subplans, or in contexts where it's known there + * are no subqueries. There mustn't be outer-aggregate references either. + */ +void +get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs) +{ + ListCell *lc; + + foreach(lc, root->aggtransinfos) + { + AggTransInfo *transinfo = (AggTransInfo *) lfirst(lc); + + /* + * Add the appropriate component function execution costs to + * appropriate totals. + */ + if (DO_AGGSPLIT_COMBINE(aggsplit)) + { + /* charge for combining previously aggregated states */ + add_function_cost(root, transinfo->combinefn_oid, NULL, + &costs->transCost); + } + else + add_function_cost(root, transinfo->transfn_oid, NULL, + &costs->transCost); + if (DO_AGGSPLIT_DESERIALIZE(aggsplit) && + OidIsValid(transinfo->deserialfn_oid)) + add_function_cost(root, transinfo->deserialfn_oid, NULL, + &costs->transCost); + if (DO_AGGSPLIT_SERIALIZE(aggsplit) && + OidIsValid(transinfo->serialfn_oid)) + add_function_cost(root, transinfo->serialfn_oid, NULL, + &costs->finalCost); + + /* + * These costs are incurred only by the initial aggregate node, so we + * mustn't include them again at upper levels. + */ + if (!DO_AGGSPLIT_COMBINE(aggsplit)) + { + /* add the input expressions' cost to per-input-row costs */ + QualCost argcosts; + + cost_qual_eval_node(&argcosts, (Node *) transinfo->args, root); + costs->transCost.startup += argcosts.startup; + costs->transCost.per_tuple += argcosts.per_tuple; + + /* + * Add any filter's cost to per-input-row costs. + * + * XXX Ideally we should reduce input expression costs according + * to filter selectivity, but it's not clear it's worth the + * trouble. + */ + if (transinfo->aggfilter) + { + cost_qual_eval_node(&argcosts, (Node *) transinfo->aggfilter, + root); + costs->transCost.startup += argcosts.startup; + costs->transCost.per_tuple += argcosts.per_tuple; + } + } + + /* + * If the transition type is pass-by-value then it doesn't add + * anything to the required size of the hashtable. If it is + * pass-by-reference then we have to add the estimated size of the + * value itself, plus palloc overhead. + */ + if (!transinfo->transtypeByVal) + { + int32 avgwidth; + + /* Use average width if aggregate definition gave one */ + if (transinfo->aggtransspace > 0) + avgwidth = transinfo->aggtransspace; + else if (transinfo->transfn_oid == F_ARRAY_APPEND) + { + /* + * If the transition function is array_append(), it'll use an + * expanded array as transvalue, which will occupy at least + * ALLOCSET_SMALL_INITSIZE and possibly more. Use that as the + * estimate for lack of a better idea. + */ + avgwidth = ALLOCSET_SMALL_INITSIZE; + } + else + { + avgwidth = get_typavgwidth(transinfo->aggtranstype, transinfo->aggtranstypmod); + } + + avgwidth = MAXALIGN(avgwidth); + costs->transitionSpace += avgwidth + 2 * sizeof(void *); + } + else if (transinfo->aggtranstype == INTERNALOID) + { + /* + * INTERNAL transition type is a special case: although INTERNAL + * is pass-by-value, it's almost certainly being used as a pointer + * to some large data structure. The aggregate definition can + * provide an estimate of the size. If it doesn't, then we assume + * ALLOCSET_DEFAULT_INITSIZE, which is a good guess if the data is + * being kept in a private memory context, as is done by + * array_agg() for instance. + */ + if (transinfo->aggtransspace > 0) + costs->transitionSpace += transinfo->aggtransspace; + else + costs->transitionSpace += ALLOCSET_DEFAULT_INITSIZE; + } + } + + foreach(lc, root->agginfos) + { + AggInfo *agginfo = (AggInfo *) lfirst(lc); + Aggref *aggref = agginfo->representative_aggref; + + /* + * Add the appropriate component function execution costs to + * appropriate totals. + */ + if (!DO_AGGSPLIT_SKIPFINAL(aggsplit) && + OidIsValid(agginfo->finalfn_oid)) + add_function_cost(root, agginfo->finalfn_oid, NULL, + &costs->finalCost); + + /* + * If there are direct arguments, treat their evaluation cost like the + * cost of the finalfn. + */ + if (aggref->aggdirectargs) + { + QualCost argcosts; + + cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs, + root); + costs->finalCost.startup += argcosts.startup; + costs->finalCost.per_tuple += argcosts.per_tuple; + } + } +} diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 85ef873caaf..587d494c34f 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -53,14 +53,6 @@ #include "utils/syscache.h" #include "utils/typcache.h" - -typedef struct -{ - PlannerInfo *root; - AggSplit aggsplit; - AggClauseCosts *costs; -} get_agg_clause_costs_context; - typedef struct { ParamListInfo boundParams; @@ -98,8 +90,6 @@ typedef struct } max_parallel_hazard_context; static bool contain_agg_clause_walker(Node *node, void *context); -static bool get_agg_clause_costs_walker(Node *node, - get_agg_clause_costs_context *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); @@ -200,284 +190,6 @@ contain_agg_clause_walker(Node *node, void *context) return expression_tree_walker(node, contain_agg_clause_walker, context); } -/* - * get_agg_clause_costs - * Recursively find the Aggref nodes in an expression tree, and - * accumulate cost information about them. - * - * 'aggsplit' tells us the expected partial-aggregation mode, which affects - * the cost estimates. - * - * NOTE that the counts/costs are ADDED to those already in *costs ... so - * the caller is responsible for zeroing the struct initially. - * - * We count the nodes, estimate their execution costs, and estimate the total - * space needed for their transition state values if all are evaluated in - * parallel (as would be done in a HashAgg plan). Also, we check whether - * partial aggregation is feasible. See AggClauseCosts for the exact set - * of statistics collected. - * - * In addition, we mark Aggref nodes with the correct aggtranstype, so - * that that doesn't need to be done repeatedly. (That makes this function's - * name a bit of a misnomer.) - * - * This does not descend into subqueries, and so should be used only after - * reduction of sublinks to subplans, or in contexts where it's known there - * are no subqueries. There mustn't be outer-aggregate references either. - */ -void -get_agg_clause_costs(PlannerInfo *root, Node *clause, AggSplit aggsplit, - AggClauseCosts *costs) -{ - get_agg_clause_costs_context context; - - context.root = root; - context.aggsplit = aggsplit; - context.costs = costs; - (void) get_agg_clause_costs_walker(clause, &context); -} - -static bool -get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context) -{ - if (node == NULL) - return false; - if (IsA(node, Aggref)) - { - Aggref *aggref = (Aggref *) node; - AggClauseCosts *costs = context->costs; - HeapTuple aggTuple; - Form_pg_aggregate aggform; - Oid aggtransfn; - Oid aggfinalfn; - Oid aggcombinefn; - Oid aggserialfn; - Oid aggdeserialfn; - Oid aggtranstype; - int32 aggtransspace; - QualCost argcosts; - - Assert(aggref->agglevelsup == 0); - - /* - * Fetch info about aggregate from pg_aggregate. Note it's correct to - * ignore the moving-aggregate variant, since what we're concerned - * with here is aggregates not window functions. - */ - aggTuple = SearchSysCache1(AGGFNOID, - ObjectIdGetDatum(aggref->aggfnoid)); - if (!HeapTupleIsValid(aggTuple)) - elog(ERROR, "cache lookup failed for aggregate %u", - aggref->aggfnoid); - aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); - aggtransfn = aggform->aggtransfn; - aggfinalfn = aggform->aggfinalfn; - aggcombinefn = aggform->aggcombinefn; - aggserialfn = aggform->aggserialfn; - aggdeserialfn = aggform->aggdeserialfn; - aggtranstype = aggform->aggtranstype; - aggtransspace = aggform->aggtransspace; - ReleaseSysCache(aggTuple); - - /* - * Resolve the possibly-polymorphic aggregate transition type, unless - * already done in a previous pass over the expression. - */ - if (OidIsValid(aggref->aggtranstype)) - aggtranstype = aggref->aggtranstype; - else - { - Oid inputTypes[FUNC_MAX_ARGS]; - int numArguments; - - /* extract argument types (ignoring any ORDER BY expressions) */ - numArguments = get_aggregate_argtypes(aggref, inputTypes); - - /* resolve actual type of transition state, if polymorphic */ - aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid, - aggtranstype, - inputTypes, - numArguments); - aggref->aggtranstype = aggtranstype; - } - - /* - * Count it, and check for cases requiring ordered input. Note that - * ordered-set aggs always have nonempty aggorder. Any ordered-input - * case also defeats partial aggregation. - */ - costs->numAggs++; - if (aggref->aggorder != NIL || aggref->aggdistinct != NIL) - { - costs->numOrderedAggs++; - costs->hasNonPartial = true; - } - - /* - * Check whether partial aggregation is feasible, unless we already - * found out that we can't do it. - */ - if (!costs->hasNonPartial) - { - /* - * If there is no combine function, then partial aggregation is - * not possible. - */ - if (!OidIsValid(aggcombinefn)) - costs->hasNonPartial = true; - - /* - * If we have any aggs with transtype INTERNAL then we must check - * whether they have serialization/deserialization functions; if - * not, we can't serialize partial-aggregation results. - */ - else if (aggtranstype == INTERNALOID && - (!OidIsValid(aggserialfn) || !OidIsValid(aggdeserialfn))) - costs->hasNonSerial = true; - } - - /* - * Add the appropriate component function execution costs to - * appropriate totals. - */ - if (DO_AGGSPLIT_COMBINE(context->aggsplit)) - { - /* charge for combining previously aggregated states */ - add_function_cost(context->root, aggcombinefn, NULL, - &costs->transCost); - } - else - add_function_cost(context->root, aggtransfn, NULL, - &costs->transCost); - if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) && - OidIsValid(aggdeserialfn)) - add_function_cost(context->root, aggdeserialfn, NULL, - &costs->transCost); - if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) && - OidIsValid(aggserialfn)) - add_function_cost(context->root, aggserialfn, NULL, - &costs->finalCost); - if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) && - OidIsValid(aggfinalfn)) - add_function_cost(context->root, aggfinalfn, NULL, - &costs->finalCost); - - /* - * These costs are incurred only by the initial aggregate node, so we - * mustn't include them again at upper levels. - */ - if (!DO_AGGSPLIT_COMBINE(context->aggsplit)) - { - /* add the input expressions' cost to per-input-row costs */ - cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root); - costs->transCost.startup += argcosts.startup; - costs->transCost.per_tuple += argcosts.per_tuple; - - /* - * Add any filter's cost to per-input-row costs. - * - * XXX Ideally we should reduce input expression costs according - * to filter selectivity, but it's not clear it's worth the - * trouble. - */ - if (aggref->aggfilter) - { - cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter, - context->root); - costs->transCost.startup += argcosts.startup; - costs->transCost.per_tuple += argcosts.per_tuple; - } - } - - /* - * If there are direct arguments, treat their evaluation cost like the - * cost of the finalfn. - */ - if (aggref->aggdirectargs) - { - cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs, - context->root); - costs->finalCost.startup += argcosts.startup; - costs->finalCost.per_tuple += argcosts.per_tuple; - } - - /* - * If the transition type is pass-by-value then it doesn't add - * anything to the required size of the hashtable. If it is - * pass-by-reference then we have to add the estimated size of the - * value itself, plus palloc overhead. - */ - if (!get_typbyval(aggtranstype)) - { - int32 avgwidth; - - /* Use average width if aggregate definition gave one */ - if (aggtransspace > 0) - avgwidth = aggtransspace; - else if (aggtransfn == F_ARRAY_APPEND) - { - /* - * If the transition function is array_append(), it'll use an - * expanded array as transvalue, which will occupy at least - * ALLOCSET_SMALL_INITSIZE and possibly more. Use that as the - * estimate for lack of a better idea. - */ - avgwidth = ALLOCSET_SMALL_INITSIZE; - } - else - { - /* - * If transition state is of same type as first aggregated - * input, assume it's the same typmod (same width) as well. - * This works for cases like MAX/MIN and is probably somewhat - * reasonable otherwise. - */ - int32 aggtranstypmod = -1; - - if (aggref->args) - { - TargetEntry *tle = (TargetEntry *) linitial(aggref->args); - - if (aggtranstype == exprType((Node *) tle->expr)) - aggtranstypmod = exprTypmod((Node *) tle->expr); - } - - avgwidth = get_typavgwidth(aggtranstype, aggtranstypmod); - } - - avgwidth = MAXALIGN(avgwidth); - costs->transitionSpace += avgwidth + 2 * sizeof(void *); - } - else if (aggtranstype == INTERNALOID) - { - /* - * INTERNAL transition type is a special case: although INTERNAL - * is pass-by-value, it's almost certainly being used as a pointer - * to some large data structure. The aggregate definition can - * provide an estimate of the size. If it doesn't, then we assume - * ALLOCSET_DEFAULT_INITSIZE, which is a good guess if the data is - * being kept in a private memory context, as is done by - * array_agg() for instance. - */ - if (aggtransspace > 0) - costs->transitionSpace += aggtransspace; - else - costs->transitionSpace += ALLOCSET_DEFAULT_INITSIZE; - } - - /* - * We assume that the parser checked that there are no aggregates (of - * this level anyway) in the aggregated arguments, direct arguments, - * or filter clause. Hence, we need not recurse into any of them. - */ - return false; - } - Assert(!IsA(node, SubLink)); - return expression_tree_walker(node, get_agg_clause_costs_walker, - (void *) context); -} - - /***************************************************************************** * Window-function clause manipulation *****************************************************************************/ |