summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/create_table_like.out2
-rw-r--r--src/test/regress/expected/opr_sanity.out3
-rw-r--r--src/test/regress/expected/stats_ext.out287
-rw-r--r--src/test/regress/expected/type_sanity.out3
-rw-r--r--src/test/regress/sql/stats_ext.sql186
5 files changed, 475 insertions, 6 deletions
diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out
index b5822112706..4c8a5bd7e2d 100644
--- a/src/test/regress/expected/create_table_like.out
+++ b/src/test/regress/expected/create_table_like.out
@@ -243,7 +243,7 @@ Indexes:
Check constraints:
"ctlt1_a_check" CHECK (length(a) > 2)
Statistics objects:
- "public"."ctlt_all_a_b_stat" (ndistinct, dependencies) ON a, b FROM ctlt_all
+ "public"."ctlt_all_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM ctlt_all
SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_class c WHERE classoid = 'pg_class'::regclass AND objoid = i.indexrelid AND c.oid = i.indexrelid AND i.indrelid = 'ctlt_all'::regclass ORDER BY c.relname, objsubid;
relname | objsubid | description
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 49a0acc0ee4..49fc313af06 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -892,11 +892,12 @@ WHERE c.castmethod = 'b' AND
pg_node_tree | text | 0 | i
pg_ndistinct | bytea | 0 | i
pg_dependencies | bytea | 0 | i
+ pg_mcv_list | bytea | 0 | i
cidr | inet | 0 | i
xml | text | 0 | a
xml | character varying | 0 | a
xml | character | 0 | a
-(9 rows)
+(10 rows)
-- **************** pg_conversion ****************
-- Look for illegal values in pg_conversion fields.
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 05c200717f1..b32663459d6 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -58,7 +58,7 @@ ALTER TABLE ab1 DROP COLUMN a;
b | integer | | |
c | integer | | |
Statistics objects:
- "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1
+ "public"."ab1_b_c_stats" (ndistinct, dependencies, mcv) ON b, c FROM ab1
-- Ensure statistics are dropped when table is
SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
@@ -211,7 +211,7 @@ SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
stxkind | stxndistinct
---------+---------------------------------------------------------
- {d,f} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
+ {d,f,m} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
(1 row)
-- Hash Aggregate, thanks to estimates improved by the statistic
@@ -277,7 +277,7 @@ SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
stxkind | stxndistinct
---------+-------------------------------------------------------------
- {d,f} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
+ {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
(1 row)
-- plans using Group Aggregate, thanks to using correct esimates
@@ -514,3 +514,284 @@ EXPLAIN (COSTS OFF)
(5 rows)
RESET random_page_cost;
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain analyze %s', $1)
+ loop
+ if first_row then
+ first_row := false;
+ tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+ return query select tmp[1]::int, tmp[2]::int;
+ end if;
+ end loop;
+end;
+$$;
+-- MCV lists
+CREATE TABLE mcv_lists (
+ filler1 TEXT,
+ filler2 NUMERIC,
+ a INT,
+ b VARCHAR,
+ filler3 DATE,
+ c INT,
+ d TEXT
+);
+-- random data (no MCV list)
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 3 | 4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 3 | 4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
+
+-- 100 distinct combinations, all in the MCV list
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+-- check change of unrelated column type does not reset the MCV statistics
+ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
+SELECT stxmcv IS NOT NULL FROM pg_statistic_ext WHERE stxname = 'mcv_lists_stats';
+ ?column?
+----------
+ t
+(1 row)
+
+-- check change of column type resets the MCV statistics
+ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+-- 100 distinct combinations with NULL values, all in the MCV list
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT
+ (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END),
+ (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END),
+ (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END),
+ i
+ FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+RESET random_page_cost;
+-- mcv with arrays
+CREATE TABLE mcv_lists_arrays (
+ a TEXT[],
+ b NUMERIC[],
+ c INT[]
+);
+INSERT INTO mcv_lists_arrays (a, b, c)
+ SELECT
+ ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)],
+ ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000],
+ ARRAY[(i/100-1), i/100, (i/100+1)]
+ FROM generate_series(1,5000) s(i);
+CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c
+ FROM mcv_lists_arrays;
+ANALYZE mcv_lists_arrays;
+-- mcv with bool
+CREATE TABLE mcv_lists_bool (
+ a BOOL,
+ b BOOL,
+ c BOOL
+);
+INSERT INTO mcv_lists_bool (a, b, c)
+ SELECT
+ (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0)
+ FROM generate_series(1,10000) s(i);
+ANALYZE mcv_lists_bool;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
+ estimated | actual
+-----------+--------
+ 156 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
+ estimated | actual
+-----------+--------
+ 156 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
+ estimated | actual
+-----------+--------
+ 469 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+ estimated | actual
+-----------+--------
+ 1094 | 0
+(1 row)
+
+CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
+ FROM mcv_lists_bool;
+ANALYZE mcv_lists_bool;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
+ estimated | actual
+-----------+--------
+ 1250 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 6d19c230f20..8f7f532f416 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -72,7 +72,8 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
194 | pg_node_tree
3361 | pg_ndistinct
3402 | pg_dependencies
-(3 rows)
+ 5017 | pg_mcv_list
+(4 rows)
-- Make sure typarray points to a varlena array type of our own base
SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 12d709cb133..2a6a23c06d4 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -280,3 +280,189 @@ EXPLAIN (COSTS OFF)
SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
RESET random_page_cost;
+
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain analyze %s', $1)
+ loop
+ if first_row then
+ first_row := false;
+ tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+ return query select tmp[1]::int, tmp[2]::int;
+ end if;
+ end loop;
+end;
+$$;
+
+-- MCV lists
+CREATE TABLE mcv_lists (
+ filler1 TEXT,
+ filler2 NUMERIC,
+ a INT,
+ b VARCHAR,
+ filler3 DATE,
+ c INT,
+ d TEXT
+);
+
+-- random data (no MCV list)
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+
+-- 100 distinct combinations, all in the MCV list
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+
+-- check change of unrelated column type does not reset the MCV statistics
+ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
+
+SELECT stxmcv IS NOT NULL FROM pg_statistic_ext WHERE stxname = 'mcv_lists_stats';
+
+-- check change of column type resets the MCV statistics
+ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
+
+-- 100 distinct combinations with NULL values, all in the MCV list
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT
+ (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END),
+ (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END),
+ (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END),
+ i
+ FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
+
+RESET random_page_cost;
+
+-- mcv with arrays
+CREATE TABLE mcv_lists_arrays (
+ a TEXT[],
+ b NUMERIC[],
+ c INT[]
+);
+
+INSERT INTO mcv_lists_arrays (a, b, c)
+ SELECT
+ ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)],
+ ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000],
+ ARRAY[(i/100-1), i/100, (i/100+1)]
+ FROM generate_series(1,5000) s(i);
+
+CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c
+ FROM mcv_lists_arrays;
+
+ANALYZE mcv_lists_arrays;
+
+-- mcv with bool
+CREATE TABLE mcv_lists_bool (
+ a BOOL,
+ b BOOL,
+ c BOOL
+);
+
+INSERT INTO mcv_lists_bool (a, b, c)
+ SELECT
+ (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0)
+ FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_lists_bool;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+
+CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
+ FROM mcv_lists_bool;
+
+ANALYZE mcv_lists_bool;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');