From e093dcdd2853911ca1ad710581182dfcb6c78ea3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Aug 2006 04:06:58 +0000 Subject: Add the ability to create indexes 'concurrently', that is, without blocking concurrent writes to the table. Greg Stark, with a little help from Tom Lane. --- doc/src/sgml/catalogs.sgml | 13 ++++- doc/src/sgml/indexam.sgml | 12 ++++- doc/src/sgml/indices.sgml | 13 ++++- doc/src/sgml/mvcc.sgml | 8 +-- doc/src/sgml/ref/create_index.sgml | 104 +++++++++++++++++++++++++++++++++++-- doc/src/sgml/ref/reindex.sgml | 16 +++++- 6 files changed, 155 insertions(+), 11 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 80ed7d829bc..20604d73e48 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,4 +1,4 @@ - + @@ -2420,6 +2420,17 @@ If true, the table was last clustered on this index + + indisvalid + bool + + If true, the index is currently valid for queries. + False means the index is possibly incomplete: it must still be + inserted into by INSERT/UPDATE operations, but it cannot safely be + used for queries, and if it is unique, the uniqueness shouldn't be + relied on either. + + indkey int2vector diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 1afa120766e..10ab84278d0 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -1,4 +1,4 @@ - + Index Access Method Interface Definition @@ -648,6 +648,16 @@ amrestrpos (IndexScanDesc scan); + + Furthermore, immediately before raising a uniqueness violation + according to the above rules, the access method must recheck the + liveness of the row being inserted. If it is committed dead then + no error should be raised. (This case cannot occur during the + ordinary scenario of inserting a row that's just been created by + the current transaction. It can happen during + CREATE UNIQUE INDEX CONCURRENTLY, however.) + + We require the index access method to apply these tests itself, which means that it must reach into the heap to check the commit status of diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 10669c01557..223bb81cfc8 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1,4 +1,4 @@ - + Indexes @@ -90,6 +90,17 @@ CREATE INDEX test1_id_index ON test1 (id); significantly speed up queries with joins. + + Creating an index on a large table can take a long time. By default, + PostgreSQL allows reads (selects) to occur + on the table in parallel with index creation, but writes (inserts, + updates, deletes) are blocked until the index build is finished. + It is possible to allow writes to occur in parallel with index + creation, but there are several caveats to be aware of — + for more information see . + + After an index is created, the system has to keep it synchronized with the table. This adds overhead to data manipulation operations. diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 8ebb8205192..1a1e95db501 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -1,4 +1,4 @@ - + Concurrency Control @@ -622,7 +622,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Acquired by VACUUM (without ). + Acquired by VACUUM (without ) + and by CREATE INDEX CONCURRENTLY. @@ -641,7 +642,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Acquired by CREATE INDEX. + Acquired by CREATE INDEX + (without ). diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 20a89713bf4..be0ca63f2c5 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -1,5 +1,5 @@ @@ -20,7 +20,7 @@ PostgreSQL documentation -CREATE [ UNIQUE ] INDEX name ON table [ USING method ] +CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] name ON table [ USING method ] ( { column | ( expression ) } [ opclass ] [, ...] ) [ WITH ( storage_parameter = value [, ... ] ) ] [ TABLESPACE tablespace ] @@ -110,6 +110,21 @@ CREATE [ UNIQUE ] INDEX name ON + + CONCURRENTLY + + + When this option is used, PostgreSQL will build the + index without taking any locks that prevent concurrent inserts, + updates, or deletes on the table; whereas a standard index build + locks out writes (but not reads) on the table until it's done. + There are several caveats to be aware of when using this option + — see . + + + + name @@ -239,6 +254,82 @@ CREATE [ UNIQUE ] INDEX name ON + + + Building Indexes Concurrently + + + index + building concurrently + + + + Creating an index for a large table can be a long operation. In large data + warehousing applications it can easily take hours or even days to build + indexes. It's important to understand the impact creating indexes has on a + system. + + + + Normally PostgreSQL locks the table to be indexed against + writes and performs the entire index build with a single scan of the + table. Other transactions can still read the table, but if they try to + insert, update, or delete rows in the table they will block until the + index build is finished. + + + + PostgreSQL also supports building indexes without locking + out writes. This method is invoked by specifying the + CONCURRENTLY option of CREATE INDEX. + When this option is used, + PostgreSQL must perform two scans of the table, and in + addition it must wait for all existing transactions to terminate. Thus + this method requires more total work than a standard index build and takes + significantly longer to complete. However, since it allows normal + operations to continue while the index is built, this method is useful for + adding new indexes in a production environment. Of course, the extra CPU + and I/O load imposed by the index creation may slow other operations. + + + + If a problem arises during the second scan of the table, such as a + uniqueness violation in a unique index, the CREATE INDEX + command will fail but leave behind an invalid index. This index + will be ignored for querying purposes because it may be incomplete; + however it will still consume update overhead. The recommended recovery + method in such cases is to drop the index and try again to perform + CREATE INDEX CONCURRENTLY. (Another possibility is to rebuild + the index with REINDEX. However, since REINDEX + does not support concurrent builds, this option is unlikely to seem + attractive.) + + + + Another caveat when building a unique index concurrently is that the + uniqueness constraint is already being enforced against other transactions + when the second table scan begins. This means that constraint violations + could be reported in other queries prior to the index becoming available + for use, or even in cases where the index build eventually fails. Also, + if a failure does occur in the second scan, the invalid index + continues to enforce its uniqueness constraint afterwards. + + + + Concurrent builds of expression indexes and partial indexes are supported. + Errors occurring in the evaluation of these expressions could cause + behavior similar to that described above for unique constraint violations. + + + + Regular index builds permit other regular index builds on the + same table to occur in parallel, but only one concurrent index build + can occur on a table at a time. In both cases, no other types of schema + modification on the table are allowed meanwhile. Another difference + is that a regular CREATE INDEX command can be performed within + a transaction block, but CREATE INDEX CONCURRENTLY cannot. + + @@ -339,15 +430,22 @@ Is this example correct? To create a GiST index on a point attribute so that we can efficiently use box operators on the result of the conversion function: - CREATE INDEX pointloc ON points USING GIST (point2box(location) box_ops); SELECT * FROM points WHERE point2box(points.pointloc) = boxes.box; + --> + + To create an index without locking out writes to the table: + +CREATE INDEX CONCURRENTLY sales_quantity_index ON sales_table (quantity); + + + diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index dbe10ca7629..9502a0daf57 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -1,5 +1,5 @@ @@ -30,7 +30,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam REINDEX rebuilds an index using the data stored in the index's table, replacing the old copy of the index. There are - three main reasons to use REINDEX: + several scenarios in which to use REINDEX: @@ -61,6 +61,18 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam for an index, and wish to ensure that the change has taken full effect. + + + + An index build with the CONCURRENTLY option failed, leaving + an invalid index. Such indexes are useless but it can be + convenient to use REINDEX to rebuild them. Note that + REINDEX will not perform a concurrent build. To build the + index without interfering with production you should drop the index and + reissue the CREATE INDEX CONCURRENTLY command. + + + -- cgit v1.2.3