diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 80 | ||||
-rw-r--r-- | src/test/regress/expected/aggregates.out | 20 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 80 | ||||
-rw-r--r-- | src/test/regress/sql/aggregates.sql | 9 | ||||
-rw-r--r-- | src/test/regress/sql/join.sql | 33 |
5 files changed, 179 insertions, 43 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 65f17101591..a8c8edfac75 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8284,8 +8284,8 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist) * the needs of the semijoin represented by sjinfo. If it is not possible * to identify how to make the data unique, NULL is returned. * - * If used at all, this is likely to be called repeatedly on the same rel; - * So we cache the result. + * If used at all, this is likely to be called repeatedly on the same rel, + * so we cache the result. */ RelOptInfo * create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) @@ -8375,9 +8375,14 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) * variables of the input rel's targetlist. We have to add any such * expressions to the unique rel's targetlist. * - * While in the loop, build the lists of SortGroupClause's that - * represent the ordering for the sort-based implementation and the - * grouping for the hash-based implementation. + * To complicate matters, some of the values to be unique-ified may be + * known redundant by the EquivalenceClass machinery (e.g., because + * they have been equated to constants). There is no need to compare + * such values during unique-ification, and indeed we had better not + * try because the Vars involved may not have propagated as high as + * the semijoin's level. We use make_pathkeys_for_sortclauses to + * detect such cases, which is a tad inefficient but it doesn't seem + * worth building specialized infrastructure for this. */ newtlist = make_tlist_from_pathtarget(rel->reltarget); nextresno = list_length(newtlist) + 1; @@ -8386,8 +8391,9 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) { Expr *uniqexpr = lfirst(lc1); Oid in_oper = lfirst_oid(lc2); - Oid sortop = InvalidOid; + Oid sortop; TargetEntry *tle; + bool made_tle = false; tle = tlist_member(uniqexpr, newtlist); if (!tle) @@ -8398,19 +8404,21 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) false); newtlist = lappend(newtlist, tle); nextresno++; + made_tle = true; } - if (sjinfo->semi_can_btree) + /* + * Try to build an ORDER BY list to sort the input compatibly. We + * do this for each sortable clause even when the clauses are not + * all sortable, so that we can detect clauses that are redundant + * according to the pathkey machinery. + */ + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (OidIsValid(sortop)) { - /* Create an ORDER BY list to sort the input compatibly */ Oid eqop; SortGroupClause *sortcl; - sortop = get_ordering_op_for_equality_op(in_oper, false); - if (!OidIsValid(sortop)) /* shouldn't happen */ - elog(ERROR, "could not find ordering operator for equality operator %u", - in_oper); - /* * The Unique node will need equality operators. Normally * these are the same as the IN clause operators, but if those @@ -8430,7 +8438,32 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) sortcl->nulls_first = false; sortcl->hashable = false; /* no need to make this accurate */ sortList = lappend(sortList, sortcl); + + /* + * At each step, convert the SortGroupClause list to pathkey + * form. If the just-added SortGroupClause is redundant, the + * result will be shorter than the SortGroupClause list. + */ + sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, + newtlist); + if (list_length(sortPathkeys) != list_length(sortList)) + { + /* Drop the redundant SortGroupClause */ + sortList = list_delete_last(sortList); + Assert(list_length(sortPathkeys) == list_length(sortList)); + /* Undo tlist addition, if we made one */ + if (made_tle) + { + newtlist = list_delete_last(newtlist); + nextresno--; + } + /* We need not consider this clause for hashing, either */ + continue; + } } + else if (sjinfo->semi_can_btree) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); if (sjinfo->semi_can_hash) { @@ -8460,8 +8493,27 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) } } + /* + * Done building the sortPathkeys and groupClause. But the + * sortPathkeys are bogus if not all the clauses were sortable. + */ + if (!sjinfo->semi_can_btree) + sortPathkeys = NIL; + + /* + * It can happen that all the RHS columns are equated to constants. + * We'd have to do something special to unique-ify in that case, and + * it's such an unlikely-in-the-real-world case that it's not worth + * the effort. So just punt if we found no columns to unique-ify. + */ + if (sortPathkeys == NIL && groupClause == NIL) + { + MemoryContextSwitchTo(oldcontext); + return NULL; + } + + /* Convert the required targetlist back to PathTarget form */ unique_rel->reltarget = create_pathtarget(root, newtlist); - sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, newtlist); } /* build unique paths based on input rel's pathlist */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 7319945ffe3..c35288eecde 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -3398,26 +3398,6 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) ba | 0 | 1 (2 rows) --- Make sure that generation of HashAggregate for uniqification purposes --- does not lead to array overflow due to unexpected duplicate hash keys --- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com -set enable_memoize to off; -explain (costs off) - select 1 from tenk1 - where (hundred, thousand) in (select twothousand, twothousand from onek); - QUERY PLAN -------------------------------------------------------------- - Hash Join - Hash Cond: (tenk1.hundred = onek.twothousand) - -> Seq Scan on tenk1 - Filter: (hundred = thousand) - -> Hash - -> HashAggregate - Group Key: onek.twothousand, onek.twothousand - -> Seq Scan on onek -(8 rows) - -reset enable_memoize; -- -- Hash Aggregation Spill tests -- diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 98b05c94a11..b26b8c5bdbe 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3222,6 +3222,24 @@ where b.unique2 is null; -> Index Only Scan using tenk1_unique2 on tenk1 b (5 rows) +-- check that we avoid de-duplicating columns redundantly +set enable_memoize to off; +explain (costs off) +select 1 from tenk1 +where (hundred, thousand) in (select twothousand, twothousand from onek); + QUERY PLAN +------------------------------------------------- + Hash Join + Hash Cond: (tenk1.hundred = onek.twothousand) + -> Seq Scan on tenk1 + Filter: (hundred = thousand) + -> Hash + -> HashAggregate + Group Key: onek.twothousand + -> Seq Scan on onek +(8 rows) + +reset enable_memoize; -- -- regression test for bogus RTE_GROUP entries -- @@ -6501,6 +6519,68 @@ where t1.a = s.c; (0 rows) rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; +create temp table t (a int unique, b int); +insert into t values (1, 2); +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + QUERY PLAN +------------------------------------------------------------------------------------ + Nested Loop + Output: t1.a + Inner Unique: true + -> Nested Loop + Output: t1.a, t5.a + -> Index Only Scan using t_a_key on pg_temp.t t1 + Output: t1.a + Index Cond: (t1.a = 1) + -> HashAggregate + Output: t5.a + Group Key: t5.a + -> Hash Join + Output: t5.a + Hash Cond: (t6.b = t4.b) + -> Seq Scan on pg_temp.t t6 + Output: t6.a, t6.b + -> Hash + Output: t4.b, t5.b, t5.a + -> Hash Join + Output: t4.b, t5.b, t5.a + Inner Unique: true + Hash Cond: (t5.b = t4.b) + -> Seq Scan on pg_temp.t t5 + Output: t5.a, t5.b + -> Hash + Output: t4.b, t4.a + -> Index Scan using t_a_key on pg_temp.t t4 + Output: t4.b, t4.a + Index Cond: (t4.a = 1) + -> Index Only Scan using t_a_key on pg_temp.t t3 + Output: t3.a + Index Cond: (t3.a = t5.a) +(32 rows) + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + a +--- + 1 +(1 row) + +rollback; -- test cases where we can remove a join, but not a PHV computed at it begin; create temp table t (a int unique, b int); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index dde85d0dfb2..62540b1ffa4 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1510,15 +1510,6 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) from unnest(array['a','b']) u(v) group by v||'a' order by 1; --- Make sure that generation of HashAggregate for uniqification purposes --- does not lead to array overflow due to unexpected duplicate hash keys --- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com -set enable_memoize to off; -explain (costs off) - select 1 from tenk1 - where (hundred, thousand) in (select twothousand, twothousand from onek); -reset enable_memoize; - -- -- Hash Aggregation Spill tests -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 5f0a475894d..bccd171afb6 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -839,6 +839,13 @@ explain (costs off) select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 where b.unique2 is null; +-- check that we avoid de-duplicating columns redundantly +set enable_memoize to off; +explain (costs off) +select 1 from tenk1 +where (hundred, thousand) in (select twothousand, twothousand from onek); +reset enable_memoize; + -- -- regression test for bogus RTE_GROUP entries -- @@ -2420,6 +2427,32 @@ where t1.a = s.c; rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; + +create temp table t (a int unique, b int); +insert into t values (1, 2); + +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +rollback; + -- test cases where we can remove a join, but not a PHV computed at it begin; |