diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-05-26 14:52:24 -0400 | 
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-05-26 14:52:30 -0400 | 
| commit | aeb9ae6457865c8949641d71a9523374d843a418 (patch) | |
| tree | 099574b47620bae45704e7d8c725561bc411ba14 /src | |
| parent | d7ef3572a8a17cd6c495d7593ea94a2cf2c076e3 (diff) | |
Disable physical tlist if any Var would need multiple sortgroupref labels.
As part of upper planner pathification (commit 3fc6e2d7f5b652b4) I redid
createplan.c's approach to the physical-tlist optimization, in which scan
nodes are allowed to return exactly the underlying table's columns so as
to save doing a projection step at runtime.  The logic was intentionally
more aggressive than before about applying the optimization, which is
generally a good thing, but Andres Freund found a case in which it got
too aggressive.  Namely, if any column is referenced more than once in
the parent plan node's sorting or grouping column list, we can't optimize
because then that column would need to have more than one ressortgroupref
label, and we only have space for one.
Add logic to detect this situation in use_physical_tlist(), and also add
some error checking in apply_pathtarget_labeling_to_tlist(), which this
example proves was being overly cavalier about whether what it was doing
made any sense.
The added test case exposes the problem only because we do not eliminate
duplicate grouping keys.  That might be something to fix someday, but it
doesn't seem like appropriate post-beta work.
Report: <20160526021235.w4nq7k3gnheg7vit@alap3.anarazel.de>
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/optimizer/plan/createplan.c | 15 | ||||
| -rw-r--r-- | src/backend/optimizer/util/tlist.c | 23 | ||||
| -rw-r--r-- | src/test/regress/expected/select_distinct.out | 24 | ||||
| -rw-r--r-- | src/test/regress/sql/select_distinct.sql | 11 | 
4 files changed, 65 insertions, 8 deletions
| diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 185f0625a78..bd19f43d586 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -787,10 +787,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)  	 * to emit any sort/group columns that are not simple Vars.  (If they are  	 * simple Vars, they should appear in the physical tlist, and  	 * apply_pathtarget_labeling_to_tlist will take care of getting them -	 * labeled again.) +	 * labeled again.)	We also have to check that no two sort/group columns +	 * are the same Var, else that element of the physical tlist would need +	 * conflicting ressortgroupref labels.  	 */  	if ((flags & CP_LABEL_TLIST) && path->pathtarget->sortgrouprefs)  	{ +		Bitmapset  *sortgroupatts = NULL; +  		i = 0;  		foreach(lc, path->pathtarget->exprs)  		{ @@ -799,7 +803,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)  			if (path->pathtarget->sortgrouprefs[i])  			{  				if (expr && IsA(expr, Var)) -					 /* okay */ ; +				{ +					int			attno = ((Var *) expr)->varattno; + +					attno -= FirstLowInvalidHeapAttributeNumber; +					if (bms_is_member(attno, sortgroupatts)) +						return false; +					sortgroupatts = bms_add_member(sortgroupatts, attno); +				}  				else  					return false;  			} diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index aa2c2f890c0..94825408b2a 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -736,17 +736,28 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)  			 * this allows us to deal with some cases where a set-returning  			 * function has been inlined, so that we now have more knowledge  			 * about what it returns than we did when the original Var was -			 * created.  Otherwise, use regular equal() to see if there's a -			 * matching TLE.  (In current usage, only the Var case is actually -			 * needed; but it seems best to have sane behavior here for -			 * non-Vars too.) +			 * created.  Otherwise, use regular equal() to find the matching +			 * TLE.  (In current usage, only the Var case is actually needed; +			 * but it seems best to have sane behavior here for non-Vars too.)  			 */  			if (expr && IsA(expr, Var))  				tle = tlist_member_match_var((Var *) expr, tlist);  			else  				tle = tlist_member((Node *) expr, tlist); -			if (tle) -				tle->ressortgroupref = target->sortgrouprefs[i]; + +			/* +			 * Complain if noplace for the sortgrouprefs label, or if we'd +			 * have to label a column twice.  (The case where it already has +			 * the desired label probably can't happen, but we may as well +			 * allow for it.) +			 */ +			if (!tle) +				elog(ERROR, "ORDER/GROUP BY expression not found in targetlist"); +			if (tle->ressortgroupref != 0 && +				tle->ressortgroupref != target->sortgrouprefs[i]) +				elog(ERROR, "targetlist item has multiple sortgroupref labels"); + +			tle->ressortgroupref = target->sortgrouprefs[i];  		}  		i++;  	} diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 38107a04133..f3696c6d1de 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -125,6 +125,30 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >;  (20 rows)  -- +-- Check mentioning same column more than once +-- +EXPLAIN (VERBOSE, COSTS OFF) +SELECT count(*) FROM +  (SELECT DISTINCT two, four, two FROM tenk1) ss; +                       QUERY PLAN                        +-------------------------------------------------------- + Aggregate +   Output: count(*) +   ->  HashAggregate +         Output: tenk1.two, tenk1.four, tenk1.two +         Group Key: tenk1.two, tenk1.four, tenk1.two +         ->  Seq Scan on public.tenk1 +               Output: tenk1.two, tenk1.four, tenk1.two +(7 rows) + +SELECT count(*) FROM +  (SELECT DISTINCT two, four, two FROM tenk1) ss; + count  +------- +     4 +(1 row) + +--  -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its  -- very own regression file.  -- diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 328ba51c7a8..a605e86449e 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -35,6 +35,17 @@ SELECT DISTINCT two, string4, ten  SELECT DISTINCT p.age FROM person* p ORDER BY age using >;  -- +-- Check mentioning same column more than once +-- + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT count(*) FROM +  (SELECT DISTINCT two, four, two FROM tenk1) ss; + +SELECT count(*) FROM +  (SELECT DISTINCT two, four, two FROM tenk1) ss; + +--  -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its  -- very own regression file.  -- | 
