From 15c121b3ed7eb2f290e19533e41ccca734d23574 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 30 Sep 2008 10:52:14 +0000 Subject: Rewrite the FSM. Instead of relying on a fixed-size shared memory segment, the free space information is stored in a dedicated FSM relation fork, with each relation (except for hash indexes; they don't use FSM). This eliminates the max_fsm_relations and max_fsm_pages GUC options; remove any trace of them from the backend, initdb, and documentation. Rewrite contrib/pg_freespacemap to match the new FSM implementation. Also introduce a new variant of the get_raw_page(regclass, int4, int4) function in contrib/pageinspect that let's you to return pages from any relation fork, and a new fsm_page_contents() function to inspect the new FSM pages. --- doc/src/sgml/acronyms.sgml | 4 +- doc/src/sgml/config.sgml | 76 +---------- doc/src/sgml/pageinspect.sgml | 49 ++++++- doc/src/sgml/pgfreespacemap.sgml | 283 +++++++++++---------------------------- doc/src/sgml/ref/vacuum.sgml | 8 +- doc/src/sgml/release.sgml | 5 +- doc/src/sgml/runtime.sgml | 12 +- doc/src/sgml/storage.sgml | 51 ++++++- 8 files changed, 179 insertions(+), 309 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml index c7c5f865d95..82d70de730c 100644 --- a/doc/src/sgml/acronyms.sgml +++ b/doc/src/sgml/acronyms.sgml @@ -1,4 +1,4 @@ - + Acronyms @@ -216,7 +216,7 @@ FSM - Free Space Map + Free Space Map diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 9d33918a3ec..dfb976c4731 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -896,80 +896,6 @@ SET ENABLE_SEQSCAN TO OFF; - - - Free Space Map - - - free space map - - - - These parameters control the size of the shared free space - map (FSM), which tracks the locations of unused space in the database. - An undersized free space map can cause the database to consume - increasing amounts of disk space over time, because free space that - is not in the map cannot be re-used; instead PostgreSQL - will request more disk space from the operating system when it needs - to store new data. - The last few lines displayed by a database-wide VACUUM VERBOSE - command can help in determining if the current settings are adequate. - A NOTICE message is also printed during such an operation - if the current settings are too low. - - - - Increasing these parameters might cause PostgreSQL - to request more System V shared - memory than your operating system's default configuration - allows. See for information on how to - adjust those parameters, if necessary. - - - - - max_fsm_pages (integer) - - max_fsm_pages configuration parameter - - - - Sets the maximum number of disk pages for which free space will - be tracked in the shared free-space map. Six bytes of shared memory - are consumed for each page slot. This setting must be at least - 16 * max_fsm_relations. The default is chosen - by initdb depending on the amount of available memory, - and can range from 20k to 200k pages. - This parameter can only be set at server start. - - - - - - max_fsm_relations (integer) - - max_fsm_relations configuration parameter - - - - Sets the maximum number of relations (tables and indexes) for which - free space will be tracked in the shared free-space map. Roughly - seventy bytes of shared memory are consumed for each slot. - The default is one thousand relations. - This parameter can only be set at server start. - - - - - - - - - See the - command for information on setting this parameter. - - - diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml index e398733d013..94249399e10 100644 --- a/doc/src/sgml/pageinspect.sgml +++ b/doc/src/sgml/pageinspect.sgml @@ -1,4 +1,4 @@ - + pageinspect @@ -19,7 +19,7 @@ - get_raw_page(text, int) returns bytea + get_raw_page(relname text, forknum int, blkno int) returns bytea @@ -27,13 +27,28 @@ get_raw_page reads the specified block of the named table and returns a copy as a bytea value. This allows a single time-consistent copy of the block to be obtained. + forknum should be 0 for the main data fork, or 1 for + the FSM. - page_header(bytea) returns record + get_raw_page(relname text, blkno int) returns bytea + + + + + A shorthand of above, for reading from the main fork. Equal to + get_raw_page(relname, 0, blkno) + + + + + + + page_header(page bytea) returns record @@ -63,7 +78,7 @@ test=# SELECT * FROM page_header(get_raw_page('pg_class', 0)); - heap_page_items(bytea) returns setof record + heap_page_items(page bytea) returns setof record @@ -90,7 +105,7 @@ test=# SELECT * FROM heap_page_items(get_raw_page('pg_class', 0)); - bt_metap(text) returns record + bt_metap(relname text) returns record @@ -113,7 +128,7 @@ fastlevel | 0 - bt_page_stats(text, int) returns record + bt_page_stats(relname text, blkno int) returns record @@ -141,7 +156,7 @@ btpo_flags | 3 - bt_page_items(text, int) returns setof record + bt_page_items(relname text, blkno int) returns setof record @@ -164,6 +179,26 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1); + + + + fsm_page_contents(page bytea) returns text + + + + + fsm_page_contents shows the internal node structure + of a FSM page. The output is a multi-line string, with one line per + node in the binary tree within the page. Only those nodes that are not + zero are printed. The so-called "next" pointer, which points to the + next slot to be returned from the page, is also printed. + + + See src/backend/storage/freespace/README for more + information on the structure of an FSM page. + + + diff --git a/doc/src/sgml/pgfreespacemap.sgml b/doc/src/sgml/pgfreespacemap.sgml index bc821ead6ba..3d749a953d3 100644 --- a/doc/src/sgml/pgfreespacemap.sgml +++ b/doc/src/sgml/pgfreespacemap.sgml @@ -1,4 +1,4 @@ - + pg_freespacemap @@ -9,183 +9,66 @@ The pg_freespacemap module provides a means for examining the - free space map (FSM). It provides two C functions: - pg_freespacemap_relations and - pg_freespacemap_pages that each return a set of - records, plus two views pg_freespacemap_relations - and pg_freespacemap_pages that wrap the functions - for convenient use. + free space map (FSM). It provides a function called + pg_freespacemap, or two overloaded functions, to be + precise. The functions show the value recorded in the free space map for + a given page, or for all pages in the relation. - By default public access is revoked from the functions and views, just in - case there are security issues lurking. + By default public access is revoked from the functions, just in case + there are security issues lurking. - The <filename>pg_freespacemap</> views + Functions + + + + + pg_freespacemap(rel regclass IN, blkno bigint IN) returns int2 + + + + + Returns the amount of free space on the page of the relation, specified + by blkno, according to the FSM. + (blkno). + + + + + + + + pg_freespacemap(rel regclass IN, blkno OUT int4, avail OUT int2) + + + + + Displays the the amount of free space on each page of the relation, + according to the FSM. A set of (blkno int4, avail int2) + tuples is returned, one tuple for each page in the relation. + + + + - The definitions of the columns exposed by the views are: - - - - <structname>pg_freespacemap_relations</> Columns - - - - - Name - Type - References - Description - - - - - - reltablespace - oid - pg_tablespace.oid - Tablespace OID of the relation - - - reldatabase - oid - pg_database.oid - Database OID of the relation - - - relfilenode - oid - pg_class.relfilenode - Relfilenode of the relation - - - avgrequest - integer - - Moving average of free space requests (NULL for indexes) - - - interestingpages - integer - - Count of pages last reported as containing useful free space - - - storedpages - integer - - Count of pages actually stored in free space map - - - nextpage - integer - - Page index (from 0) to start next search at - - - - -
- - - <structname>pg_freespacemap_pages</> Columns - - - - - Name - Type - References - Description - - - - - - reltablespace - oid - pg_tablespace.oid - Tablespace OID of the relation - - - reldatabase - oid - pg_database.oid - Database OID of the relation - - - relfilenode - oid - pg_class.relfilenode - Relfilenode of the relation - - - relblocknumber - bigint - - Page number within the relation - - - bytes - integer - - Free bytes in the page, or NULL for an index page (see below) - - - - -
- - - For pg_freespacemap_relations, there is one row - for each relation in the free space map. - storedpages is the number of pages actually - stored in the map, while interestingpages is the - number of pages the last VACUUM thought had useful amounts of - free space. - - - - If storedpages is consistently less than - interestingpages then it'd be a good idea to increase - max_fsm_pages. Also, if the number of rows in - pg_freespacemap_relations is close to - max_fsm_relations, then you should consider increasing - max_fsm_relations. - - - - For pg_freespacemap_pages, there is one row for - each page in the free space map. The number of rows for a relation will - match the storedpages column in - pg_freespacemap_relations. + The values stored in the free space map are not exact. They're rounded + to precision of 1/256th of BLCKSZ (32 bytes with default BLCKSZ), and + they're not kept fully up-to-date as tuples are inserted and updated. For indexes, what is tracked is entirely-unused pages, rather than free - space within pages. Therefore, the average request size and free bytes - within a page are not meaningful, and are shown as NULL. + space within pages. Therefore, the values are not meaningful, just + whether a page is full or empty. - Because the map is shared by all the databases, there will normally be - entries for relations not belonging to the current database. This means - that there may not be matching join rows in pg_class for - some rows, or that there could even be incorrect joins. If you are - trying to join against pg_class, it's a good idea to - restrict the join to rows having reldatabase equal to - the current database's OID or zero. - - - - When either of the views is accessed, internal free space map locks are - taken for long enough to copy all the state data that the view will display. - This ensures that the views produce a consistent set of results, while not - blocking normal activity longer than necessary. Nonetheless there - could be some impact on database performance if they are read often. + NOTE: The interface was changed in version 8.4, to reflect the new FSM + implementation introduced in the same version.
@@ -193,45 +76,37 @@ Sample output -regression=# SELECT c.relname, r.avgrequest, r.interestingpages, r.storedpages - FROM pg_freespacemap_relations r INNER JOIN pg_class c - ON r.relfilenode = c.relfilenode AND - r.reldatabase IN (0, (SELECT oid FROM pg_database - WHERE datname = current_database())) - ORDER BY r.storedpages DESC LIMIT 10; - relname | avgrequest | interestingpages | storedpages ----------------------------------+------------+------------------+------------- - onek | 256 | 109 | 109 - pg_attribute | 167 | 93 | 93 - pg_class | 191 | 49 | 49 - pg_attribute_relid_attnam_index | | 48 | 48 - onek2 | 256 | 37 | 37 - pg_depend | 95 | 26 | 26 - pg_type | 199 | 16 | 16 - pg_rewrite | 1011 | 13 | 13 - pg_class_relname_nsp_index | | 10 | 10 - pg_proc | 302 | 8 | 8 -(10 rows) - -regression=# SELECT c.relname, p.relblocknumber, p.bytes - FROM pg_freespacemap_pages p INNER JOIN pg_class c - ON p.relfilenode = c.relfilenode AND - p.reldatabase IN (0, (SELECT oid FROM pg_database - WHERE datname = current_database())) - ORDER BY c.relname LIMIT 10; - relname | relblocknumber | bytes ---------------+----------------+------- - a_star | 0 | 8040 - abstime_tbl | 0 | 7908 - aggtest | 0 | 8008 - altinhoid | 0 | 8128 - altstartwith | 0 | 8128 - arrtest | 0 | 7172 - b_star | 0 | 7976 - box_tbl | 0 | 7912 - bt_f8_heap | 54 | 7728 - bt_i4_heap | 49 | 8008 -(10 rows) +postgres=# SELECT * FROM pg_freespace('foo'); + blkno | avail +-------+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 32 + 4 | 704 + 5 | 704 + 6 | 704 + 7 | 1216 + 8 | 704 + 9 | 704 + 10 | 704 + 11 | 704 + 12 | 704 + 13 | 704 + 14 | 704 + 15 | 704 + 16 | 704 + 17 | 704 + 18 | 704 + 19 | 3648 +(20 rows) + +postgres=# SELECT * FROM pg_freespace('foo', 7); + pg_freespace +-------------- + 1216 +(1 row) + @@ -239,7 +114,9 @@ regression=# SELECT c.relname, p.relblocknumber, p.bytes Author - Mark Kirkwood markir@paradise.net.nz + Original version by Mark Kirkwood markir@paradise.net.nz. + Rewritten in version 8.4 to suit new FSM implementation by Heikki + Linnakangas heikki@enterprisedb.com diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml index 082473c069b..0568fd4eeb8 100644 --- a/doc/src/sgml/ref/vacuum.sgml +++ b/doc/src/sgml/ref/vacuum.sgml @@ -1,5 +1,5 @@ @@ -96,11 +96,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ VERBOSE - Prints a detailed vacuum activity report for each table. Can be used - to help determine appropriate settings for - , - , and - . + Prints a detailed vacuum activity report for each table. diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml index 6f3daac2b8c..eea942d1a2e 100644 --- a/doc/src/sgml/release.sgml +++ b/doc/src/sgml/release.sgml @@ -1,4 +1,4 @@ - + + Operating System Environment @@ -1117,16 +1117,6 @@ set semsys:seminfo_semmsl=32 8200 (assuming 8 kB XLOG_BLCKSZ) - - - 70 - - - - - 6 - - Fixed space requirements 770 kB diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index e564fd2be9a..51f8a2fe165 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1,4 +1,4 @@ - + @@ -130,7 +130,12 @@ there. Each table and index is stored in a separate file, named after the table or index's filenode number, which can be found in -pg_class.relfilenode. +pg_class.relfilenode. In addition to the +main file (aka. main fork), a free space map (see +) that stores information about free space +available in the relation, is stored in a file named after the filenode +number, with the the _1 suffix. For example, if the table's filenode number +is 12345, the FSM file is named 12345_1. @@ -367,6 +372,48 @@ comparison table, in which all the HTML pages were cut down to 7 kB to fit.
+ + +Free Space Map + + + Free Space Map + + FSMFree Space Map + + +A Free Space Map is stored with every heap and index relation, except for +hash indexes, to keep track of available space in the relation. It's stored +along the main relation data, in a separate FSM relation fork, named after +relfilenode of the relation, but with a _1 suffix. For example, +if the relfilenode of a relation is 12345, the FSM is stored in a file called +12345_1, in the same directory as the main relation file. + + + +The Free Space Map is organized as a tree of FSM pages. The +bottom level FSM pages stores the free space available on every +heap (or index) page, using one byte to represent each heap page. The upper +levels aggregate information from the lower levels. + + + +Within each FSM page is a binary tree, stored in an array with +one byte per node. Each leaf node represents a heap page, or a lower level +FSM page. In each non-leaf node, the higher of its children's +values is stored. The maximum value in the leaf nodes is therefore stored +at the root. + + + +See src/backend/storage/freespace/README for more details on +how the FSM is structured, and how it's updated and searched. + contrib module can be used to view the +information stored in free space maps. + + + + Database Page Layout -- cgit v1.2.3