From f0e44751d7175fa3394da2c8f85e3ceb3cdbfe63 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 7 Dec 2016 13:17:43 -0500 Subject: Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me. --- doc/src/sgml/catalogs.sgml | 129 +++++++++++++++++++++++- doc/src/sgml/ref/alter_table.sgml | 117 +++++++++++++++++++++- doc/src/sgml/ref/create_foreign_table.sgml | 26 +++++ doc/src/sgml/ref/create_table.sgml | 154 +++++++++++++++++++++++++++++ 4 files changed, 421 insertions(+), 5 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c4246dcd866..9d2e89523d6 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -225,6 +225,11 @@ template data for procedural languages + + pg_partitioned_table + information about partition key of tables + + pg_policy row-security policies @@ -1723,7 +1728,8 @@ char - r = ordinary table, i = index, + r = ordinary table, P = partitioned table, + i = index S = sequence, v = view, m = materialized view, c = composite type, t = TOAST table, @@ -1839,6 +1845,13 @@ + + relispartition + bool + + True if table is a partition + + relfrozenxid xid @@ -1885,6 +1898,16 @@ Access-method-specific options, as keyword=value strings + + + relpartbound + pg_node_tree + + + If table is a partition (see relispartition), + internal representation of the partition bound + + @@ -4689,6 +4712,110 @@ + + <structname>pg_partitioned_table</structname> + + + pg_partitioned_table + + + + The catalog pg_partitioned_table stores + information about how tables are partitioned. + + + + <structname>pg_partitioned_table</> Columns + + + + + Name + Type + References + Description + + + + + + + partrelid + oid + pg_class.oid + The OID of the pg_class entry for this partitioned table + + + + partstrat + char + + + Partitioning strategy; l = list partitioned table, + r = range partitioned table + + + + + partnatts + int2 + + The number of columns in partition key + + + + partattrs + int2vector + pg_attribute.attnum + + This is an array of partnatts values that + indicate which table columns are part of the partition key. For + example, a value of 1 3 would mean that the first + and the third table columns make up the partition key. A zero in this + array indicates that the corresponding partition key column is an + expression, rather than a simple column reference. + + + + + partclass + oidvector + pg_opclass.oid + + For each column in the partition key, this contains the OID of the + operator class to use. See + pg_opclass for details. + + + + + partcollation + oidvector + pg_opclass.oid + + For each column in the partition key, this contains the OID of the + the collation to use for partitioning. + + + + + partexprs + pg_node_tree + + + Expression trees (in nodeToString() + representation) for partition key columns that are not simple column + references. This is a list with one element for each zero + entry in partattrs. Null if all partition key columns + are simple references. + + + + + +
+
+ <structname>pg_policy</structname> diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index e48ccf21e4f..a6a43c4b302 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -33,6 +33,10 @@ ALTER TABLE [ IF EXISTS ] name SET SCHEMA new_schema ALTER TABLE ALL IN TABLESPACE name [ OWNED BY role_name [, ... ] ] SET TABLESPACE new_tablespace [ NOWAIT ] +ALTER TABLE [ IF EXISTS ] name + ATTACH PARTITION partition_name FOR VALUES partition_bound_spec +ALTER TABLE [ IF EXISTS ] name + DETACH PARTITION partition_name where action is one of: @@ -166,6 +170,12 @@ ALTER TABLE ALL IN TABLESPACE name values or to reject null values. You can only use SET NOT NULL when the column contains no null values. + + + If this table is a partition, one cannot perform DROP NOT NULL + on a column if it is marked NOT NULL in the parent + table. + @@ -704,13 +714,63 @@ ALTER TABLE ALL IN TABLESPACE name + + ATTACH PARTITION partition_name partition_bound_spec + + + This form attaches an existing table (which might itself be partitioned) + as a partition of the target table using the same syntax for + partition_bound_spec as + . The partition bound specification + must correspond to the partitioning strategy and partition key of the + target table. The table to be attached must have all the same columns + as the target table and no more; moreover, the column types must also + match. Also, it must have all the NOT NULL and + CHECK constraints of the target table. Currently + UNIQUE, PRIMARY KEY, and + FOREIGN KEY constraints are not considered. + If any of the CHECK constraints of the table being + attached is marked NO INHERIT, the command will fail; + such a constraint must be recreated without the NO INHERIT + clause. + + + + A full table scan is performed on the table being attached to check that + no existing row in the table violates the partition constraint. It is + possible to avoid this scan by adding a valid CHECK + constraint to the table that would allow only the rows satisfying the + desired partition constraint before running this command. It will be + determined using such a constraint that the table need not be scanned + to validate the partition constraint. This does not work, however, if + any of the partition keys is an expression and the partition does not + accept NULL values. If attaching a list partition + that will not accept NULL values, also add + NOT NULL constraint to the partition key column, + unless it's an expression. + + + + + + DETACH PARTITION partition_name + + + This form detaches specified partition of the target table. The detached + partition continues to exist as a standalone table, but no longer has any + ties to the table from which it was detached. + + + + All the actions except RENAME, - SET TABLESPACE and SET SCHEMA - can be combined into + SET TABLESPACE, SET SCHEMA, + ATTACH PARTITION, and + DETACH PARTITION can be combined into a list of multiple alterations to apply in parallel. For example, it is possible to add several columns and/or alter the type of several columns in a single command. This is particularly useful with large @@ -721,8 +781,9 @@ ALTER TABLE ALL IN TABLESPACE name You must own the table to use ALTER TABLE. To change the schema or tablespace of a table, you must also have CREATE privilege on the new schema or tablespace. - To add the table as a new child of a parent table, you must own the - parent table as well. + To add the table as a new child of a parent table, you must own the parent + table as well. Also, to attach a table as a new partition of the table, + you must own the table being attached. To alter the owner, you must also be a direct or indirect member of the new owning role, and that role must have CREATE privilege on the table's schema. (These restrictions enforce that altering the owner @@ -938,6 +999,25 @@ ALTER TABLE ALL IN TABLESPACE name + + partition_name + + + The name of the table to attach as a new partition or to detach from this table. + + + + + + partition_bound_spec + + + The partition bound specification for a new partition. Refer to + for more details on the syntax of the same. + + + + @@ -977,6 +1057,11 @@ ALTER TABLE ALL IN TABLESPACE name but does not require a table rewrite. + + Similarly, when attaching a new partition it may be scanned to verify that + existing rows meet the partition constraint. + + The main reason for providing the option to specify multiple changes in a single ALTER TABLE is that multiple table scans or @@ -1047,6 +1132,9 @@ ALTER TABLE ALL IN TABLESPACE name COLUMN (i.e., ALTER TABLE ONLY ... DROP COLUMN) never removes any descendant columns, but instead marks them as independently defined rather than inherited. + A nonrecursive DROP COLUMN command will fail for a + partitioned table, because all partitions of a table must have the same + columns as the partitioning root. @@ -1233,6 +1321,27 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey, ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx; + + Attach a partition to range partitioned table: + +ALTER TABLE measurement + ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01'); + + + + Attach a partition to list partitioned table: + +ALTER TABLE cities + ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco'); + + + + Detach a partition from partitioned table: + +ALTER TABLE cities + DETACH PARTITION measurement_y2015m12; + + diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml index 413b033cb57..5d0dcf567b5 100644 --- a/doc/src/sgml/ref/create_foreign_table.sgml +++ b/doc/src/sgml/ref/create_foreign_table.sgml @@ -27,6 +27,15 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name SERVER server_name [ OPTIONS ( option 'value' [, ... ] ) ] +CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name + PARTITION OF parent_table [ ( + { column_name WITH OPTIONS [ column_constraint [ ... ] ] + | table_constraint } + [, ... ] +) ] partition_bound_spec + SERVER server_name +[ OPTIONS ( option 'value' [, ... ] ) ] + where column_constraint is: [ CONSTRAINT constraint_name ] @@ -67,6 +76,12 @@ CHECK ( expression ) [ NO INHERIT ] name as any existing data type in the same schema. + + If PARTITION OF clause is specified then the table is + created as a partition of parent_table with specified + bounds. + + To be able to create a foreign table, you must have USAGE privilege on the foreign server, as well as USAGE @@ -314,6 +329,17 @@ CREATE FOREIGN TABLE films ( SERVER film_server; + + Create foreign table measurement_y2016m07, which will be + accessed through the server server_07, as a partition + of the range partitioned table measurement: + + +CREATE FOREIGN TABLE measurement_y2016m07 + PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01') + SERVER server_07; + + diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index bf2ad64d66e..8bf8af302b5 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -28,6 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ] ) [ INHERITS ( parent_table [, ... ] ) ] +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -38,6 +39,18 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI | table_constraint } [, ... ] ) ] +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] +[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] +[ TABLESPACE tablespace_name ] + +CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name + PARTITION OF parent_table [ ( + { column_name [ column_constraint [ ... ] ] + | table_constraint } + [, ... ] +) ] FOR VALUES partition_bound_spec +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -70,6 +83,11 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI { INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL } +and partition_bound_spec is: + +{ IN ( expression [, ...] ) | + FROM ( { expression | UNBOUNDED } [, ...] ) TO ( { expression | UNBOUNDED } [, ...] ) } + index_parameters in UNIQUE, PRIMARY KEY, and EXCLUDE constraints are: [ WITH ( storage_parameter [= value] [, ... ] ) ] @@ -229,6 +247,51 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + PARTITION OF parent_table + + + Creates the table as partition of the specified + parent table. + + + + The partition bound specification must correspond to the partitioning + method and partition key of the parent table, and must not overlap with + any existing partition of that parent. + + + + A partition cannot have columns other than those inherited from the + parent. That includes the oid column, which can be + specified using the WITH (OIDS) clause. + Defaults and constraints can optionally be specified for each of the + inherited columns. One can also specify table constraints in addition + to those inherited from the parent. If a check constraint with the name + matching one of the parent's constraint is specified, it is merged with + the latter, provided the specified condition is same. + + + + Rows inserted into a partitioned table will be automatically routed to + the correct partition. If no suitable partition exists, an error will + occur. + + + + A partition must have the same column names and types as the table of + which it is a partition. Therefore, modifications to the column names + or types of the partitioned table will automatically propagate to all + children, as will operations such as TRUNCATE which normally affect a + table and all of its inheritance children. It is also possible to + TRUNCATE a partition individually, just as for an inheritance child. + Note that dropping a partition with DROP TABLE + requires taking an ACCESS EXCLUSIVE lock on the + parent table. + + + + column_name @@ -313,6 +376,46 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ opclass ] [, ...] ) + + + The optional PARTITION BY clause specifies a strategy + of partitioning the table. The table thus created is called a + partitioned table. The parenthesized list of + columns or expressions forms the partition key + for the table. When using range partitioning, the partition key can + include multiple columns or expressions, but for list partitioning, the + partition key must consist of a single column or expression. If no + btree operator class is specified when creating a partitioned table, + the default btree operator class for the datatype will be used. If + there is none, an error will be reported. + + + + A partitioned table is divided into sub-tables (called partitions), + which are created using separate CREATE TABLE commands. + The partitioned table is itself empty. A data row inserted into the + table is routed to a partition based on the value of columns or + expressions in the partition key. If no existing partition matches + the values in the new row, an error will be reported. + + + + Partitioned tables do not support UNIQUE, + PRIMARY KEY, EXCLUDE, or + FOREIGN KEY constraints; however, you can define + these constraints on individual partitions. + + + + When using range partitioning, a NOT NULL constraint + is added to each non-expression column in the partition key. + + + + + LIKE source_table [ like_option ... ] @@ -1368,6 +1471,57 @@ CREATE TABLE employees OF employee_type ( PRIMARY KEY (name), salary WITH OPTIONS DEFAULT 1000 ); + + + + Create a range partitioned table: + +CREATE TABLE measurement ( + city_id int not null, + logdate date not null, + peaktemp int, + unitsales int +) PARTITION BY RANGE (logdate); + + + + Create a list partitioned table: + +CREATE TABLE cities ( + name text not null, + population int, +) PARTITION BY LIST (initcap(name)); + + + + Create partition of a range partitioned table: + +CREATE TABLE measurement_y2016m07 + PARTITION OF measurement ( + unitsales WITH OPTIONS DEFAULT 0 +) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01'); + + + + Create partition of a list partitioned table: + +CREATE TABLE cities_west + PARTITION OF cities ( + CONSTRAINT city_id_nonzero CHECK (city_id != 0) +) FOR VALUES IN ('Los Angeles', 'San Francisco'); + + + + Create partition of a list partitioned table that is itself further + partitioned and then add a partition to it: + +CREATE TABLE cities_west + PARTITION OF cities ( + CONSTRAINT city_id_nonzero CHECK (city_id != 0) +) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population); + +CREATE TABLE cities_west_10000_to_100000 + PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000); -- cgit v1.2.3