summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-08-24 14:37:51 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-08-24 14:37:51 -0400
commitaaad96e4026e7bb3033ac0127510f2c60116a1b3 (patch)
treeec5c92a09b80f6282f50a2ce30033b54fd640137 /src
parente8aed974b0e2311c586b9e300a459e473e9a3271 (diff)
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/backend/executor/nodeAgg.c15
-rw-r--r--src/backend/nodes/copyfuncs.c1
-rw-r--r--src/backend/nodes/outfuncs.c1
-rw-r--r--src/backend/optimizer/plan/createplan.c1
-rw-r--r--src/backend/optimizer/plan/subselect.c47
-rw-r--r--src/include/nodes/plannodes.h2
-rw-r--r--src/test/regress/expected/aggregates.out73
-rw-r--r--src/test/regress/sql/aggregates.sql22
8 files changed, 155 insertions, 7 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 6fd67677b39..82e4e8f63e7 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1914,13 +1914,14 @@ void
ExecReScanAgg(AggState *node)
{
ExprContext *econtext = node->ss.ps.ps_ExprContext;
+ Agg *aggnode = (Agg *) node->ss.ps.plan;
int aggno;
node->agg_done = false;
node->ss.ps.ps_TupFromTlist = false;
- if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+ if (aggnode->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -1932,11 +1933,13 @@ ExecReScanAgg(AggState *node)
return;
/*
- * If we do have the hash table and the subplan does not have any
- * parameter changes, then we can just rescan the existing hash table;
- * no need to build it again.
+ * If we do have the hash table, and the subplan does not have any
+ * parameter changes, and none of our own parameter changes affect
+ * input expressions of the aggregated functions, then we can just
+ * rescan the existing hash table; no need to build it again.
*/
- if (node->ss.ps.lefttree->chgParam == NULL)
+ if (node->ss.ps.lefttree->chgParam == NULL &&
+ !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
ResetTupleHashIterator(node->hashtable, &node->hashiter);
return;
@@ -1973,7 +1976,7 @@ ExecReScanAgg(AggState *node)
*/
MemoryContextResetAndDeleteChildren(node->aggcontext);
- if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+ if (aggnode->aggstrategy == AGG_HASHED)
{
/* Rebuild an empty hash table */
build_hash_table(node);
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 4f7a7d00661..0d7f337c950 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -781,6 +781,7 @@ _copyAgg(const Agg *from)
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
+ COPY_BITMAPSET_FIELD(aggParams);
return newnode;
}
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 5b0bc4d4cc1..40aa5fbd66f 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -645,6 +645,7 @@ _outAgg(StringInfo str, const Agg *node)
appendStringInfo(str, " %u", node->grpOperators[i]);
WRITE_LONG_FIELD(numGroups);
+ WRITE_BITMAPSET_FIELD(aggParams);
}
static void
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 38cc2ca2de6..8d656aeebea 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4311,6 +4311,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
node->numGroups = numGroups;
+ node->aggParams = NULL; /* SS_finalize_plan() will fill this */
copy_plan_costsize(plan, lefttree); /* only care about copying size */
cost_agg(&agg_path, root,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index b9255abcb6c..18f75eb4dcd 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -80,6 +80,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
Bitmapset *valid_params,
Bitmapset *scan_params);
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
+static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
/*
@@ -2356,6 +2357,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
locally_added_param);
break;
+ case T_Agg:
+ {
+ Agg *agg = (Agg *) plan;
+
+ /*
+ * AGG_HASHED plans need to know which Params are referenced
+ * in aggregate calls. Do a separate scan to identify them.
+ */
+ if (agg->aggstrategy == AGG_HASHED)
+ {
+ finalize_primnode_context aggcontext;
+
+ aggcontext.root = root;
+ aggcontext.paramids = NULL;
+ finalize_agg_primnode((Node *) agg->plan.targetlist,
+ &aggcontext);
+ finalize_agg_primnode((Node *) agg->plan.qual,
+ &aggcontext);
+ agg->aggParams = aggcontext.paramids;
+ }
+ }
+ break;
+
case T_WindowAgg:
finalize_primnode(((WindowAgg *) plan)->startOffset,
&context);
@@ -2364,7 +2388,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_Hash:
- case T_Agg:
case T_Material:
case T_Sort:
case T_Unique:
@@ -2509,6 +2532,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
}
/*
+ * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
+ * and add IDs of all PARAM_EXEC params appearing within their aggregated
+ * arguments to the result set.
+ */
+static bool
+finalize_agg_primnode(Node *node, finalize_primnode_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Aggref))
+ {
+ Aggref *agg = (Aggref *) node;
+
+ /* we should not consider the direct arguments, if any */
+ finalize_primnode((Node *) agg->args, context);
+ return false; /* there can't be any Aggrefs below here */
+ }
+ return expression_tree_walker(node, finalize_agg_primnode,
+ (void *) context);
+}
+
+/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*
* The plan is expected to return a scalar value of the given type/collation.
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 98ca192209b..c5b38df14fe 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -633,6 +633,8 @@ typedef struct Agg
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
+ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
+ /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
} Agg;
/* ----------------
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index d35cc669f7e..99be5bb2355 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -305,6 +305,79 @@ from tenk1 o;
9999
(1 row)
+-- Test handling of Params within aggregate arguments in hashed aggregation.
+-- Per bug report from Jeevan Chalke.
+explain (verbose, costs off)
+select s1, s2, sm
+from generate_series(1, 3) s1,
+ lateral (select s2, sum(s1 + s2) sm
+ from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+ Sort Key: s1.s1, s2.s2
+ -> Nested Loop
+ Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+ -> Function Scan on pg_catalog.generate_series s1
+ Output: s1.s1
+ Function Call: generate_series(1, 3)
+ -> HashAggregate
+ Output: s2.s2, sum((s1.s1 + s2.s2))
+ -> Function Scan on pg_catalog.generate_series s2
+ Output: s2.s2
+ Function Call: generate_series(1, 3)
+(13 rows)
+
+select s1, s2, sm
+from generate_series(1, 3) s1,
+ lateral (select s2, sum(s1 + s2) sm
+ from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+ s1 | s2 | sm
+----+----+----
+ 1 | 1 | 2
+ 1 | 2 | 3
+ 1 | 3 | 4
+ 2 | 1 | 3
+ 2 | 2 | 4
+ 2 | 3 | 5
+ 3 | 1 | 4
+ 3 | 2 | 5
+ 3 | 3 | 6
+(9 rows)
+
+explain (verbose, costs off)
+select array(select sum(x+y) s
+ from generate_series(1,3) y group by y order by s)
+ from generate_series(1,3) x;
+ QUERY PLAN
+-------------------------------------------------------------------
+ Function Scan on pg_catalog.generate_series x
+ Output: (SubPlan 1)
+ Function Call: generate_series(1, 3)
+ SubPlan 1
+ -> Sort
+ Output: (sum((x.x + y.y))), y.y
+ Sort Key: (sum((x.x + y.y)))
+ -> HashAggregate
+ Output: sum((x.x + y.y)), y.y
+ -> Function Scan on pg_catalog.generate_series y
+ Output: y.y
+ Function Call: generate_series(1, 3)
+(12 rows)
+
+select array(select sum(x+y) s
+ from generate_series(1,3) y group by y order by s)
+ from generate_series(1,3) x;
+ array
+---------
+ {2,3,4}
+ {3,4,5}
+ {4,5,6}
+(3 rows)
+
--
-- test for bitwise integer aggregates
--
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index fb47169c0fd..9bd5233f9c9 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -86,6 +86,28 @@ select
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
from tenk1 o;
+-- Test handling of Params within aggregate arguments in hashed aggregation.
+-- Per bug report from Jeevan Chalke.
+explain (verbose, costs off)
+select s1, s2, sm
+from generate_series(1, 3) s1,
+ lateral (select s2, sum(s1 + s2) sm
+ from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+select s1, s2, sm
+from generate_series(1, 3) s1,
+ lateral (select s2, sum(s1 + s2) sm
+ from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+
+explain (verbose, costs off)
+select array(select sum(x+y) s
+ from generate_series(1,3) y group by y order by s)
+ from generate_series(1,3) x;
+select array(select sum(x+y) s
+ from generate_series(1,3) y group by y order by s)
+ from generate_series(1,3) x;
+
--
-- test for bitwise integer aggregates
--