summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util/clauses.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2017-01-18 12:46:50 -0800
committerAndres Freund <andres@anarazel.de>2017-01-18 13:40:27 -0800
commit69f4b9c85f168ae006929eec44fc44d569e846b9 (patch)
tree0a7fa27d1be6e341d14f067d4942c1c0e0964729 /src/backend/optimizer/util/clauses.c
parente37360d5df240443bb6e997d26d54f59146283fc (diff)
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT generate_series(1,5)) so far was done in the expression evaluation (i.e. ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code. This meant that most executor nodes performing projection, and most expression evaluation functions, had to deal with the possibility that an evaluated expression could return a set of return values. That's bad because it leads to repeated code in a lot of places. It also, and that's my (Andres's) motivation, made it a lot harder to implement a more efficient way of doing expression evaluation. To fix this, introduce a new executor node (ProjectSet) that can evaluate targetlists containing one or more SRFs. To avoid the complexity of the old way of handling nested expressions returning sets (e.g. having to pass up ExprDoneCond, and dealing with arguments to functions returning sets etc.), those SRFs can only be at the top level of the node's targetlist. The planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is only necessary in ProjectSet nodes and that SRFs are only present at the top level of the node's targetlist. If there are nested SRFs the planner creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get input from an underlying node. We also discussed and prototyped evaluating targetlist SRFs using ROWS FROM(), but that turned out to be more complicated than we'd hoped. While moving SRF evaluation to ProjectSet would allow to retain the old "least common multiple" behavior when multiple SRFs are present in one targetlist (i.e. continue returning rows until all SRFs are at the end of their input at the same time), we decided to instead only return rows till all SRFs are exhausted, returning NULL for already exhausted ones. We deemed the previous behavior to be too confusing, unexpected and actually not particularly useful. As a side effect, the previously prohibited case of multiple set returning arguments to a function, is now allowed. Not because it's particularly desirable, but because it ends up working and there seems to be no argument for adding code to prohibit it. Currently the behavior for COALESCE and CASE containing SRFs has changed, returning multiple rows from the expression, even when the SRF containing "arm" of the expression is not evaluated. That's because the SRFs are evaluated in a separate ProjectSet node. As that's quite confusing, we're likely to instead prohibit SRFs in those places. But that's still being discussed, and the code would reside in places not touched here, so that's a task for later. There's a lot of, now superfluous, code dealing with set return expressions around. But as the changes to get rid of those are verbose largely boring, it seems better for readability to keep the cleanup as a separate commit. Author: Tom Lane and Andres Freund Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
Diffstat (limited to 'src/backend/optimizer/util/clauses.c')
-rw-r--r--src/backend/optimizer/util/clauses.c104
1 files changed, 13 insertions, 91 deletions
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 9e122e383d8..85ffa3afc7c 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -99,7 +99,6 @@ static bool contain_agg_clause_walker(Node *node, void *context);
static bool get_agg_clause_costs_walker(Node *node,
get_agg_clause_costs_context *context);
static bool find_window_functions_walker(Node *node, WindowFuncLists *lists);
-static bool expression_returns_set_rows_walker(Node *node, double *count);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
static bool contain_volatile_functions_walker(Node *node, void *context);
@@ -790,114 +789,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
/*
* expression_returns_set_rows
* Estimate the number of rows returned by a set-returning expression.
- * The result is 1 if there are no set-returning functions.
+ * The result is 1 if it's not a set-returning expression.
*
- * We use the product of the rowcount estimates of all the functions in
- * the given tree (this corresponds to the behavior of ExecMakeFunctionResult
- * for nested set-returning functions).
+ * We should only examine the top-level function or operator; it used to be
+ * appropriate to recurse, but not anymore. (Even if there are more SRFs in
+ * the function's inputs, their multipliers are accounted for separately.)
*
* Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
*/
double
expression_returns_set_rows(Node *clause)
{
- double result = 1;
-
- (void) expression_returns_set_rows_walker(clause, &result);
- return clamp_row_est(result);
-}
-
-static bool
-expression_returns_set_rows_walker(Node *node, double *count)
-{
- if (node == NULL)
- return false;
- if (IsA(node, FuncExpr))
+ if (clause == NULL)
+ return 1.0;
+ if (IsA(clause, FuncExpr))
{
- FuncExpr *expr = (FuncExpr *) node;
+ FuncExpr *expr = (FuncExpr *) clause;
if (expr->funcretset)
- *count *= get_func_rows(expr->funcid);
+ return clamp_row_est(get_func_rows(expr->funcid));
}
- if (IsA(node, OpExpr))
+ if (IsA(clause, OpExpr))
{
- OpExpr *expr = (OpExpr *) node;
+ OpExpr *expr = (OpExpr *) clause;
if (expr->opretset)
{
set_opfuncid(expr);
- *count *= get_func_rows(expr->opfuncid);
+ return clamp_row_est(get_func_rows(expr->opfuncid));
}
}
-
- /* Avoid recursion for some cases that can't return a set */
- if (IsA(node, Aggref))
- return false;
- if (IsA(node, WindowFunc))
- return false;
- if (IsA(node, DistinctExpr))
- return false;
- if (IsA(node, NullIfExpr))
- return false;
- if (IsA(node, ScalarArrayOpExpr))
- return false;
- if (IsA(node, BoolExpr))
- return false;
- if (IsA(node, SubLink))
- return false;
- if (IsA(node, SubPlan))
- return false;
- if (IsA(node, AlternativeSubPlan))
- return false;
- if (IsA(node, ArrayExpr))
- return false;
- if (IsA(node, RowExpr))
- return false;
- if (IsA(node, RowCompareExpr))
- return false;
- if (IsA(node, CoalesceExpr))
- return false;
- if (IsA(node, MinMaxExpr))
- return false;
- if (IsA(node, XmlExpr))
- return false;
-
- return expression_tree_walker(node, expression_returns_set_rows_walker,
- (void *) count);
-}
-
-/*
- * tlist_returns_set_rows
- * Estimate the number of rows returned by a set-returning targetlist.
- * The result is 1 if there are no set-returning functions.
- *
- * Here, the result is the largest rowcount estimate of any of the tlist's
- * expressions, not the product as you would get from naively applying
- * expression_returns_set_rows() to the whole tlist. The behavior actually
- * implemented by ExecTargetList produces a number of rows equal to the least
- * common multiple of the expression rowcounts, so that the product would be
- * a worst-case estimate that is typically not realistic. Taking the max as
- * we do here is a best-case estimate that might not be realistic either,
- * but it's probably closer for typical usages. We don't try to compute the
- * actual LCM because we're working with very approximate estimates, so their
- * LCM would be unduly noisy.
- */
-double
-tlist_returns_set_rows(List *tlist)
-{
- double result = 1;
- ListCell *lc;
-
- foreach(lc, tlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(lc);
- double colresult;
-
- colresult = expression_returns_set_rows((Node *) tle->expr);
- if (result < colresult)
- result = colresult;
- }
- return result;
+ return 1.0;
}