From 7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 24 Mar 2017 14:06:10 -0300 Subject: Implement multivariate n-distinct coefficients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for explicitly declared statistic objects (CREATE STATISTICS), allowing collection of statistics on more complex combinations that individual table columns. Companion commands DROP STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are added too. All this DDL has been designed so that more statistic types can be added later on, such as multivariate most-common-values and multivariate histograms between columns of a single table, leaving room for permitting columns on multiple tables, too, as well as expressions. This commit only adds support for collection of n-distinct coefficient on user-specified sets of columns in a single table. This is useful to estimate number of distinct groups in GROUP BY and DISTINCT clauses; estimation errors there can cause over-allocation of memory in hashed aggregates, for instance, so it's a worthwhile problem to solve. A new special pseudo-type pg_ndistinct is used. (num-distinct estimation was deemed sufficiently useful by itself that this is worthwhile even if no further statistic types are added immediately; so much so that another version of essentially the same functionality was submitted by Kyotaro Horiguchi: https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp though this commit does not use that code.) Author: Tomas Vondra. Some code rework by Álvaro. Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes, Ideriha Takeshi Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql --- doc/src/sgml/catalogs.sgml | 97 ++++++++++++++++++++ doc/src/sgml/func.sgml | 36 +++++--- doc/src/sgml/ref/allfiles.sgml | 3 + doc/src/sgml/ref/alter_statistics.sgml | 115 ++++++++++++++++++++++++ doc/src/sgml/ref/alter_table.sgml | 9 +- doc/src/sgml/ref/comment.sgml | 6 +- doc/src/sgml/ref/create_statistics.sgml | 155 ++++++++++++++++++++++++++++++++ doc/src/sgml/ref/drop_statistics.sgml | 98 ++++++++++++++++++++ doc/src/sgml/reference.sgml | 3 + 9 files changed, 504 insertions(+), 18 deletions(-) create mode 100644 doc/src/sgml/ref/alter_statistics.sgml create mode 100644 doc/src/sgml/ref/create_statistics.sgml create mode 100644 doc/src/sgml/ref/drop_statistics.sgml (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c531c73aac9..ac39c639edc 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -295,6 +295,11 @@ planner statistics + + pg_statistic_ext + extended planner statistics + + pg_subscription logical replication subscriptions @@ -4247,6 +4252,98 @@ + + <structname>pg_statistic_ext</structname> + + + pg_statistic_ext + + + + The catalog pg_statistic_ext + holds extended planner statistics. + + + + <structname>pg_statistic_ext</> Columns + + + + + Name + Type + References + Description + + + + + + + starelid + oid + pg_class.oid + The table that the described columns belongs to + + + + staname + name + + Name of the statistic. + + + + stanamespace + oid + pg_namespace.oid + + The OID of the namespace that contains this statistic + + + + + staowner + oid + pg_authid.oid + Owner of the statistic + + + + staenabled + char[] + + + An array with the modes of the enabled statistic types, encoded as + d for ndistinct coefficients. + + + + + stakeys + int2vector + pg_attribute.attnum + + This is an array of values that indicate which table columns this + statistic covers. For example a value of 1 3 would + mean that the first and the third table columns make up the statistic key. + + + + + standistinct + pg_ndistinct + + + N-distinct coefficients, serialized as pg_ndistinct type. + + + + + +
+
+ <structname>pg_namespace</structname> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3f0f7363b9b..ba6f8dd8d2d 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_serial_sequence + + pg_get_statisticsextdef + + pg_get_triggerdef @@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); get name of the sequence that a serial, smallserial or bigserial column uses
+ + pg_get_statisticsextdef(statext_oid) + text + get CREATE STATISTICS command for extended statistics objects + pg_get_triggerdef(trigger_oid) text @@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_constraintdef, pg_get_indexdef, pg_get_ruledef, - and pg_get_triggerdef, respectively reconstruct the - creating command for a constraint, index, rule, or trigger. (Note that this - is a decompiled reconstruction, not the original text of the command.) - pg_get_expr decompiles the internal form of an - individual expression, such as the default value for a column. It can be - useful when examining the contents of system catalogs. If the expression - might contain Vars, specify the OID of the relation they refer to as the - second parameter; if no Vars are expected, zero is sufficient. - pg_get_viewdef reconstructs the SELECT - query that defines a view. Most of these functions come in two variants, - one of which can optionally pretty-print the result. The - pretty-printed format is more readable, but the default format is more - likely to be interpreted the same way by future versions of + pg_get_statisticsextdef, and + pg_get_triggerdef, respectively reconstruct the + creating command for a constraint, index, rule, extended statistics object, + or trigger. (Note that this is a decompiled reconstruction, not the + original text of the command.) pg_get_expr decompiles + the internal form of an individual expression, such as the default value + for a column. It can be useful when examining the contents of system + catalogs. If the expression might contain Vars, specify the OID of the + relation they refer to as the second parameter; if no Vars are expected, + zero is sufficient. pg_get_viewdef reconstructs the + SELECT query that defines a view. Most of these functions come + in two variants, one of which can optionally pretty-print the + result. The pretty-printed format is more readable, but the default format + is more likely to be interpreted the same way by future versions of PostgreSQL; avoid using pretty-printed output for dump purposes. Passing false for the pretty-print parameter yields the same result as the variant that does not have the parameter at all. diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 974e1b74e42..01acc2ef9da 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory. + @@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory. + @@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml new file mode 100644 index 00000000000..3e4d28614a2 --- /dev/null +++ b/doc/src/sgml/ref/alter_statistics.sgml @@ -0,0 +1,115 @@ + + + + + ALTER STATISTICS + + + + ALTER STATISTICS + 7 + SQL - Language Statements + + + + ALTER STATISTICS + + change the definition of a extended statistics + + + + + +ALTER STATISTICS name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER STATISTICS name RENAME TO new_name +ALTER STATISTICS name SET SCHEMA new_schema + + + + + Description + + + ALTER STATISTICS changes the parameters of an existing + extended statistics. Any parameters not specifically set in the + ALTER STATISTICS command retain their prior settings. + + + + You must own the statistics to use ALTER STATISTICS. + To change a statistics' schema, you must also have CREATE + privilege on the new schema. + To alter the owner, you must also be a direct or indirect member of the new + owning role, and that role must have CREATE privilege on + the statistics' schema. (These restrictions enforce that altering the owner + doesn't do anything you couldn't do by dropping and recreating the statistics. + However, a superuser can alter ownership of any statistics anyway.) + + + + + Parameters + + + + + name + + + The name (optionally schema-qualified) of the statistics to be altered. + + + + + + new_owner + + + The user name of the new owner of the statistics. + + + + + + new_name + + + The new name for the statistics. + + + + + + new_schema + + + The new schema for the statistics. + + + + + + + + + + Compatibility + + + There's no ALTER STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 767ea321da7..75de2262539 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] name This form drops a column from a table. Indexes and table constraints involving the column will be automatically - dropped as well. You will need to say CASCADE if - anything outside the table depends on the column, for example, - foreign key references or views. + dropped as well. + Multivariate statistics referencing the dropped column will also be + removed if the removal of the column would cause the statistics to + contain data for only a single column. + You will need to say CASCADE if anything outside the table + depends on the column, for example, foreign key references or views. If IF EXISTS is specified and the column does not exist, no error is thrown. In this case a notice is issued instead. diff --git a/doc/src/sgml/ref/comment.sgml b/doc/src/sgml/ref/comment.sgml index 7483c8c03fc..8fe17a5767d 100644 --- a/doc/src/sgml/ref/comment.sgml +++ b/doc/src/sgml/ref/comment.sgml @@ -51,6 +51,7 @@ COMMENT ON SCHEMA object_name | SEQUENCE object_name | SERVER object_name | + STATISTICS object_name | TABLE object_name | TABLESPACE object_name | TEXT SEARCH CONFIGURATION object_name | @@ -125,8 +126,8 @@ COMMENT ON The name of the object to be commented. Names of tables, aggregates, collations, conversions, domains, foreign tables, functions, indexes, operators, operator classes, operator families, sequences, - text search objects, types, and views can be schema-qualified. - When commenting on a column, + statistics, text search objects, types, and views can be + schema-qualified. When commenting on a column, relation_name must refer to a table, view, composite type, or foreign table. @@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records'; COMMENT ON SCHEMA my_schema IS 'Departmental data'; COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys'; COMMENT ON SERVER myserver IS 'my foreign server'; +COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations'; COMMENT ON TABLE my_schema.my_table IS 'Employee Information'; COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes'; COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering'; diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 00000000000..60184a347bf --- /dev/null +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -0,0 +1,155 @@ + + + + + CREATE STATISTICS + + + + CREATE STATISTICS + 7 + SQL - Language Statements + + + + CREATE STATISTICS + define extended statistics + + + + +CREATE STATISTICS [ IF NOT EXISTS ] statistics_name ON ( + column_name, column_name [, ...]) + FROM table_name + + + + + + Description + + + CREATE STATISTICS will create a new extended statistics + object on the specified table. + The statistics will be created in the current database and + will be owned by the user issuing the command. + + + + If a schema name is given (for example, CREATE STATISTICS + myschema.mystat ...) then the statistics is created in the specified + schema. Otherwise it is created in the current schema. The name of + the statistics must be distinct from the name of any other statistics in the + same schema. + + + + + Parameters + + + + + IF NOT EXISTS + + + Do not throw an error if a statistics with the same name already exists. + A notice is issued in this case. Note that only the name of the + statistics object is considered here. The definition of the statistics is + not considered. + + + + + + statistics_name + + + The name (optionally schema-qualified) of the statistics to be created. + + + + + + column_name + + + The name of a column to be included in the statistics. + + + + + + table_name + + + The name (optionally schema-qualified) of the table the statistics should + be created on. + + + + + + + + + + Notes + + + You must be the owner of a table to create or change statistics on it. + + + + + Examples + + + Create table t1 with two functionally dependent columns, i.e. + knowledge of a value in the first column is sufficient for determining the + value in the other column. Then functional dependencies are built on those + columns: + + +CREATE TABLE t1 ( + a int, + b int +); + +INSERT INTO t1 SELECT i/100, i/500 + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s1 ON (a, b) FROM t1; + +ANALYZE t1; + +-- valid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); + +-- invalid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1); + + + + + + + Compatibility + + + There's no CREATE STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml new file mode 100644 index 00000000000..98c338182b4 --- /dev/null +++ b/doc/src/sgml/ref/drop_statistics.sgml @@ -0,0 +1,98 @@ + + + + + DROP STATISTICS + + + + DROP STATISTICS + 7 + SQL - Language Statements + + + + DROP STATISTICS + remove extended statistics + + + + +DROP STATISTICS [ IF EXISTS ] name [, ...] + + + + + Description + + + DROP STATISTICS removes statistics from the database. + Only the statistics owner, the schema owner, and superuser can drop a + statistics. + + + + + + Parameters + + + + IF EXISTS + + + Do not throw an error if the statistics do not exist. A notice is + issued in this case. + + + + + + name + + + The name (optionally schema-qualified) of the statistics to drop. + + + + + + + + + Examples + + + To destroy two statistics objects on different schemas, without failing + if they don't exist: + + +DROP STATISTICS IF EXISTS + accounting.users_uid_creation, + public.grants_user_role; + + + + + + + Compatibility + + + There's no DROP STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index 3d8ad232fa3..9000b3aaaa7 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -60,6 +60,7 @@ &alterSchema; &alterSequence; &alterServer; + &alterStatistics; &alterSubscription; &alterSystem; &alterTable; @@ -108,6 +109,7 @@ &createSchema; &createSequence; &createServer; + &createStatistics; &createSubscription; &createTable; &createTableAs; @@ -154,6 +156,7 @@ &dropSchema; &dropSequence; &dropServer; + &dropStatistics; &dropSubscription; &dropTable; &dropTableSpace; -- cgit v1.2.3