Fix CLUSTER tuplesorts on abbreviated expressions.

CLUSTER sort won't use the datum1 SortTuple field when clustering against an index whose leading key is an expression. This makes it unsafe to use the abbreviated keys optimization, which was missed by the logic that sets up SortSupport state. Affected tuplesorts output tuples in a completely bogus order as a result (the wrong SortSupport based comparator was used for the leading attribute). This issue is similar to the bug fixed on the master branch by recent commit cc58eecc5d. But it's a far older issue, that dates back to the introduction of the abbreviated keys optimization by commit 4ea51cdfe8. Backpatch to all supported versions. Author: Peter Geoghegan <pg@bowt.ie> Author: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CA+hUKG+bA+bmwD36_oDxAoLrCwZjVtST2fqe=b4=qZcmU7u89A@mail.gmail.com Backpatch: 10-
author: Peter Geoghegan <pg@bowt.ie> 2022-04-20 17:17:33 -0700
committer: Peter Geoghegan <pg@bowt.ie> 2022-04-20 17:17:33 -0700
commit: a903895b3fe49c4ceb6fbcea73e357493245771d (patch)
tree: b8e745d5635b83cf481cc54b282ad3cfd28ad3ef
parent: a1e4782a0bca3f19f05042c6f05675dbf4fcacc7 (diff)
3 files changed, 167 insertions, 1 deletions
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 80bc67bc9e5..73fea8de01b 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -830,6 +830,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 						int workMem, bool randomAccess)
 {
 	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
+	AttrNumber	leading;
 	ScanKey		indexScanKey;
 	MemoryContext oldcontext;
 	int			i;
@@ -861,6 +862,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 	state->abbrevNext = 10;
 
 	state->indexInfo = BuildIndexInfo(indexRel);
+	leading = state->indexInfo->ii_KeyAttrNumbers[0];
 
 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
 
@@ -899,7 +901,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 		sortKey->ssup_attno = scanKey->sk_attno;
 		/* Convey if abbreviation optimization is applicable in principle */
-		sortKey->abbreviate = (i == 0);
+		sortKey->abbreviate = (i == 0 && leading != 0);
 
 		AssertState(sortKey->ssup_attno != 0);
 
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 097ac2b0060..4b887a9391d 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -473,10 +473,131 @@ where row(hundred, thousand, tenthous) <= row(lhundred, lthousand, ltenthous);
 reset enable_indexscan;
 reset maintenance_work_mem;
 reset replacement_sort_tuples;
+-- test CLUSTER on expression index
+CREATE TABLE clstr_expression(id serial primary key, a int, b text COLLATE "C");
+INSERT INTO clstr_expression(a, b) SELECT g.i % 42, 'prefix'||g.i FROM generate_series(1, 133) g(i);
+CREATE INDEX clstr_expression_minus_a ON clstr_expression ((-a), b);
+CREATE INDEX clstr_expression_upper_b ON clstr_expression ((upper(b)));
+-- verify indexes work before cluster
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_upper_b on clstr_expression
+   Index Cond: (upper(b) = 'PREFIX3'::text)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+ id | a |    b    
+----+---+---------
+  3 | 3 | prefix3
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_minus_a on clstr_expression
+   Index Cond: ((- a) = '-3'::integer)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+ id  | a |     b     
+-----+---+-----------
+ 129 | 3 | prefix129
+   3 | 3 | prefix3
+  45 | 3 | prefix45
+  87 | 3 | prefix87
+(4 rows)
+
+COMMIT;
+-- and after clustering on clstr_expression_minus_a
+CLUSTER clstr_expression USING clstr_expression_minus_a;
+WITH rows AS
+  (SELECT ctid, lag(a) OVER (ORDER BY ctid) AS la, a FROM clstr_expression)
+SELECT * FROM rows WHERE la < a;
+ ctid | la | a 
+------+----+---
+(0 rows)
+
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_upper_b on clstr_expression
+   Index Cond: (upper(b) = 'PREFIX3'::text)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+ id | a |    b    
+----+---+---------
+  3 | 3 | prefix3
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_minus_a on clstr_expression
+   Index Cond: ((- a) = '-3'::integer)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+ id  | a |     b     
+-----+---+-----------
+ 129 | 3 | prefix129
+   3 | 3 | prefix3
+  45 | 3 | prefix45
+  87 | 3 | prefix87
+(4 rows)
+
+COMMIT;
+-- and after clustering on clstr_expression_upper_b
+CLUSTER clstr_expression USING clstr_expression_upper_b;
+WITH rows AS
+  (SELECT ctid, lag(b) OVER (ORDER BY ctid) AS lb, b FROM clstr_expression)
+SELECT * FROM rows WHERE upper(lb) > upper(b);
+ ctid | lb | b 
+------+----+---
+(0 rows)
+
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_upper_b on clstr_expression
+   Index Cond: (upper(b) = 'PREFIX3'::text)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+ id | a |    b    
+----+---+---------
+  3 | 3 | prefix3
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using clstr_expression_minus_a on clstr_expression
+   Index Cond: ((- a) = '-3'::integer)
+(2 rows)
+
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+ id  | a |     b     
+-----+---+-----------
+ 129 | 3 | prefix129
+   3 | 3 | prefix3
+  45 | 3 | prefix45
+  87 | 3 | prefix87
+(4 rows)
+
+COMMIT;
 -- clean up
 DROP TABLE clustertest;
 DROP TABLE clstr_1;
 DROP TABLE clstr_2;
 DROP TABLE clstr_3;
 DROP TABLE clstr_4;
+DROP TABLE clstr_expression;
 DROP USER regress_clstr_user;
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 8dd9459bda0..c26eb8953ba 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -227,10 +227,53 @@ reset enable_indexscan;
 reset maintenance_work_mem;
 reset replacement_sort_tuples;
 
+-- test CLUSTER on expression index
+CREATE TABLE clstr_expression(id serial primary key, a int, b text COLLATE "C");
+INSERT INTO clstr_expression(a, b) SELECT g.i % 42, 'prefix'||g.i FROM generate_series(1, 133) g(i);
+CREATE INDEX clstr_expression_minus_a ON clstr_expression ((-a), b);
+CREATE INDEX clstr_expression_upper_b ON clstr_expression ((upper(b)));
+
+-- verify indexes work before cluster
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+COMMIT;
+
+-- and after clustering on clstr_expression_minus_a
+CLUSTER clstr_expression USING clstr_expression_minus_a;
+WITH rows AS
+  (SELECT ctid, lag(a) OVER (ORDER BY ctid) AS la, a FROM clstr_expression)
+SELECT * FROM rows WHERE la < a;
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+COMMIT;
+
+-- and after clustering on clstr_expression_upper_b
+CLUSTER clstr_expression USING clstr_expression_upper_b;
+WITH rows AS
+  (SELECT ctid, lag(b) OVER (ORDER BY ctid) AS lb, b FROM clstr_expression)
+SELECT * FROM rows WHERE upper(lb) > upper(b);
+BEGIN;
+SET LOCAL enable_seqscan = false;
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+SELECT * FROM clstr_expression WHERE upper(b) = 'PREFIX3';
+EXPLAIN (COSTS OFF) SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+SELECT * FROM clstr_expression WHERE -a = -3 ORDER BY -a, b;
+COMMIT;
+
 -- clean up
 DROP TABLE clustertest;
 DROP TABLE clstr_1;
 DROP TABLE clstr_2;
 DROP TABLE clstr_3;
 DROP TABLE clstr_4;
+DROP TABLE clstr_expression;
+
 DROP USER regress_clstr_user;
author	Peter Geoghegan <pg@bowt.ie>	2022-04-20 17:17:33 -0700
committer	Peter Geoghegan <pg@bowt.ie>	2022-04-20 17:17:33 -0700
commit	a903895b3fe49c4ceb6fbcea73e357493245771d (patch)
tree	b8e745d5635b83cf481cc54b282ad3cfd28ad3ef
parent	a1e4782a0bca3f19f05042c6f05675dbf4fcacc7 (diff)