diff options
Diffstat (limited to 'src/backend/storage/freespace/freespace.c')
-rw-r--r-- | src/backend/storage/freespace/freespace.c | 2128 |
1 files changed, 533 insertions, 1595 deletions
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 9373675b8cc..1602ec0cc93 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,245 +8,123 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.60 2008/03/10 02:04:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.61 2008/09/30 10:52:13 heikki Exp $ * * * NOTES: * - * The only really interesting aspect of this code is the heuristics for - * deciding how much information we can afford to keep about each relation, - * given that we have a limited amount of workspace in shared memory. - * These currently work as follows: - * - * The number of distinct relations tracked is limited by a configuration - * variable (MaxFSMRelations). When this would be exceeded, we discard the - * least recently used relation. A doubly-linked list with move-to-front - * behavior keeps track of which relation is least recently used. - * - * For each known relation, we track the average request size given to - * GetPageWithFreeSpace() as well as the most recent number of pages reported - * to RecordRelationFreeSpace(). The average request size is not directly - * used in this module, but we expect VACUUM to use it to filter out - * uninteresting amounts of space before calling RecordRelationFreeSpace(). - * The sum of the RRFS page counts is thus the total number of "interesting" - * pages that we would like to track; this is called DesiredFSMPages. - * - * The number of pages actually tracked is limited by a configuration variable - * (MaxFSMPages). When this is less than DesiredFSMPages, each relation - * gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages. - * We discard pages with less free space to reach this target. - * - * Actually, our space allocation is done in "chunks" of CHUNKPAGES pages, - * with each relation guaranteed at least one chunk. This reduces thrashing - * of the storage allocations when there are small changes in the RRFS page - * counts from one VACUUM to the next. (XXX it might also be worthwhile to - * impose some kind of moving-average smoothing on the RRFS page counts?) - * - * So the actual arithmetic is: for each relation compute myRequest as the - * number of chunks needed to hold its RRFS page count (not counting the - * first, guaranteed chunk); compute sumRequests as the sum of these values - * over all relations; then for each relation figure its target allocation - * as - * 1 + round(spareChunks * myRequest / sumRequests) - * where spareChunks = totalChunks - numRels is the number of chunks we have - * a choice what to do with. We round off these numbers because truncating - * all of them would waste significant space. But because of roundoff, it's - * possible for the last few relations to get less space than they should; - * the target allocation must be checked against remaining available space. + * Free Space Map keeps track of the amount of free space on pages, and + * allows quickly searching for a page with enough free space. The FSM is + * stored in a dedicated relation fork of all heap relations, and those + * index access methods that need it (see also indexfsm.c). See README for + * more information. * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <limits.h> -#include <math.h> -#include <unistd.h> - -#include "storage/fd.h" +#include "access/htup.h" +#include "access/xlogutils.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" #include "storage/freespace.h" +#include "storage/fsm_internals.h" +#include "storage/lmgr.h" #include "storage/lwlock.h" -#include "storage/shmem.h" +#include "storage/smgr.h" +#include "utils/rel.h" +#include "utils/inval.h" +#include "miscadmin.h" - -/*---------- - * During database shutdown, we store the contents of FSM into a disk file, - * which is re-read during startup. This way we don't have a startup - * transient condition where FSM isn't really functioning. +/* + * We use just one byte to store the amount of free space on a page, so we + * divide the amount of free space a page can have into 256 different + * categories. The highest category, 255, represents a page with at least + * MaxFSMRequestSize bytes of free space, and the second highest category + * represents the range from 254 * FSM_CAT_STEP, inclusive, to + * MaxFSMRequestSize, exclusive. * - * The file format is: - * label "FSM\0" - * endian constant 0x01020304 for detecting endianness problems - * version# - * numRels - * -- for each rel, in *reverse* usage order: - * relfilenode - * isIndex - * avgRequest - * interestingPages - * storedPages - * arena data array of storedPages FSMPageData or IndexFSMPageData - *---------- + * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming + * default 8k BLCKSZ, and that MaxFSMRequestSize is 24 bytes, the categories + * look like this + * + * + * Range Category + * 0 - 31 0 + * 32 - 63 1 + * ... ... ... + * 8096 - 8127 253 + * 8128 - 8163 254 + * 8164 - 8192 255 + * + * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize + * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize + * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize + * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a + * completely empty page, that would mean that we could never satisfy a + * request of exactly MaxFSMRequestSize bytes. */ - -/* Name of FSM cache file (relative to $PGDATA) */ -#define FSM_CACHE_FILENAME "global/pg_fsm.cache" - -/* Fixed values in header */ -#define FSM_CACHE_LABEL "FSM" -#define FSM_CACHE_ENDIAN 0x01020304 -#define FSM_CACHE_VERSION 20030305 - -/* File header layout */ -typedef struct FsmCacheFileHeader -{ - char label[4]; - uint32 endian; - uint32 version; - int32 numRels; -} FsmCacheFileHeader; - -/* Per-relation header */ -typedef struct FsmCacheRelHeader -{ - RelFileNode key; /* hash key (must be first) */ - bool isIndex; /* if true, we store only page numbers */ - uint32 avgRequest; /* moving average of space requests */ - BlockNumber interestingPages; /* # of pages with useful free space */ - int32 storedPages; /* # of pages stored in arena */ -} FsmCacheRelHeader; - -int MaxFSMRelations; /* these are set by guc.c */ -int MaxFSMPages; - -static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */ -static HTAB *FreeSpaceMapRelHash; /* points to (what used to be) - * FSMHeader->relHash */ - - -static void CheckFreeSpaceMapStatistics(int elevel, int numRels, - double needed); -static FSMRelation *lookup_fsm_rel(RelFileNode *rel); -static FSMRelation *create_fsm_rel(RelFileNode *rel); -static void delete_fsm_rel(FSMRelation *fsmrel); -static int realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages, - bool isIndex); -static void link_fsm_rel_usage(FSMRelation *fsmrel); -static void unlink_fsm_rel_usage(FSMRelation *fsmrel); -static void link_fsm_rel_storage(FSMRelation *fsmrel); -static void unlink_fsm_rel_storage(FSMRelation *fsmrel); -static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded); -static BlockNumber find_index_free_space(FSMRelation *fsmrel); -static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, - Size spaceAvail); -static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, - int *outPageIndex); -static void compact_fsm_storage(void); -static void push_fsm_rels_after(FSMRelation *afterRel); -static void pack_incoming_pages(FSMPageData *newLocation, int newPages, - FSMPageData *pageSpaces, int nPages); -static void pack_existing_pages(FSMPageData *newLocation, int newPages, - FSMPageData *oldLocation, int oldPages); -static int fsm_calc_request(FSMRelation *fsmrel); -static int fsm_calc_request_unclamped(FSMRelation *fsmrel); -static int fsm_calc_target_allocation(int myRequest); -static int fsm_current_chunks(FSMRelation *fsmrel); -static int fsm_current_allocation(FSMRelation *fsmrel); - +#define FSM_CATEGORIES 256 +#define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES) +#define MaxFSMRequestSize MaxHeapTupleSize /* - * Exported routines + * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks, + * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise, + * 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice, + * this means that 4096 bytes is the smallest BLCKSZ that we can get away + * with a 3-level tree, and 512 is the smallest we support. */ +#define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4) +#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1) +#define FSM_BOTTOM_LEVEL 0 /* - * InitFreeSpaceMap -- Initialize the freespace module. - * - * This must be called once during shared memory initialization. - * It builds the empty free space map table. FreeSpaceLock must also be - * initialized at some point, but is not touched here --- we assume there is - * no need for locking, since only the calling process can be accessing shared - * memory as yet. + * The internal FSM routines work on a logical addressing scheme. Each + * level of the tree can be thought of as a separately addressable file. */ -void -InitFreeSpaceMap(void) +typedef struct { - HASHCTL info; - int nchunks; - bool found; - - /* Create table header */ - FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header", - sizeof(FSMHeader), - &found); - if (FreeSpaceMap == NULL) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("insufficient shared memory for free space map"))); - if (!found) - MemSet(FreeSpaceMap, 0, sizeof(FSMHeader)); - - /* Create hashtable for FSMRelations */ - info.keysize = sizeof(RelFileNode); - info.entrysize = sizeof(FSMRelation); - info.hash = tag_hash; - - FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash", - MaxFSMRelations + 1, - MaxFSMRelations + 1, - &info, - (HASH_ELEM | HASH_FUNCTION)); - - if (!FreeSpaceMapRelHash) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("insufficient shared memory for free space map"))); - - if (found) - return; - - - /* Allocate page-storage arena */ - nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; - /* This check ensures spareChunks will be greater than zero */ - if (nchunks <= MaxFSMRelations) - ereport(FATAL, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("max_fsm_pages must exceed max_fsm_relations * %d", - CHUNKPAGES))); - - FreeSpaceMap->arena = (char *) ShmemAlloc((Size) nchunks * CHUNKBYTES); - if (FreeSpaceMap->arena == NULL) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("insufficient shared memory for free space map"))); - - FreeSpaceMap->totalChunks = nchunks; - FreeSpaceMap->usedChunks = 0; - FreeSpaceMap->sumRequests = 0; -} + int level; /* level */ + int logpageno; /* page number within the level */ +} FSMAddress; -/* - * Estimate amount of shmem space needed for FSM. - */ -Size -FreeSpaceShmemSize(void) +/* Address of the root page. */ +static const FSMAddress FSM_ROOT_ADDRESS = { FSM_ROOT_LEVEL, 0 }; + +/* XLOG record types */ +#define XLOG_FSM_TRUNCATE 0x00 /* truncate */ + +typedef struct { - Size size; - int nchunks; + RelFileNode node; /* truncated relation */ + BlockNumber nheapblocks; /* new number of blocks in the heap */ +} xl_fsm_truncate; - /* table header */ - size = MAXALIGN(sizeof(FSMHeader)); +/* functions to navigate the tree */ +static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot); +static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot); +static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot); +static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot); +static BlockNumber fsm_logical_to_physical(FSMAddress addr); - /* hash table, including the FSMRelation objects */ - size = add_size(size, hash_estimate_size(MaxFSMRelations + 1, - sizeof(FSMRelation))); +static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend); +static void fsm_extend(Relation rel, BlockNumber nfsmblocks); - /* page-storage arena */ - nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; - size = add_size(size, mul_size(nchunks, CHUNKBYTES)); +/* functions to convert amount of free space to a FSM category */ +static uint8 fsm_space_avail_to_cat(Size avail); +static uint8 fsm_space_needed_to_cat(Size needed); +static Size fsm_space_cat_to_avail(uint8 cat); + +/* workhorse functions for various operations */ +static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, + uint8 newValue, uint8 minValue); +static BlockNumber fsm_search(Relation rel, uint8 min_cat); +static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof); - return size; -} + +/******** Public API ********/ /* * GetPageWithFreeSpace - try to find a page in the given relation with @@ -262,1608 +140,668 @@ FreeSpaceShmemSize(void) * extend the relation. */ BlockNumber -GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) +GetPageWithFreeSpace(Relation rel, Size spaceNeeded) { - FSMRelation *fsmrel; - BlockNumber freepage; - - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - - /* - * We always add a rel to the hashtable when it is inquired about. - */ - fsmrel = create_fsm_rel(rel); - - /* - * Update the moving average of space requests. This code implements an - * exponential moving average with an equivalent period of about 63 - * requests. Ignore silly requests, however, to ensure that the average - * stays sane. - */ - if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) - { - int cur_avg = (int) fsmrel->avgRequest; - - cur_avg += ((int) spaceNeeded - cur_avg) / 32; - fsmrel->avgRequest = (Size) cur_avg; - } - freepage = find_free_space(fsmrel, spaceNeeded); - LWLockRelease(FreeSpaceLock); - return freepage; + uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded); + return fsm_search(rel, min_cat); } /* * RecordAndGetPageWithFreeSpace - update info about a page and try again. * - * We provide this combo form, instead of a separate Record operation, - * to save one lock and hash table lookup cycle. + * We provide this combo form to save some locking overhead, compared to + * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's + * also some effort to return a page close to the old page; if there's a + * page with enough free space on the same FSM page where the old one page + * is located, it is preferred. */ BlockNumber -RecordAndGetPageWithFreeSpace(RelFileNode *rel, - BlockNumber oldPage, - Size oldSpaceAvail, - Size spaceNeeded) +RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, + Size oldSpaceAvail, Size spaceNeeded) { - FSMRelation *fsmrel; - BlockNumber freepage; + int old_cat = fsm_space_avail_to_cat(oldSpaceAvail); + int search_cat = fsm_space_needed_to_cat(spaceNeeded); + FSMAddress addr; + uint16 slot; + int search_slot; - /* Sanity check: ensure spaceAvail will fit into OffsetNumber */ - AssertArg(oldSpaceAvail < BLCKSZ); + /* Get the location of the FSM byte representing the heap block */ + addr = fsm_get_location(oldPage, &slot); - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); + search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat); /* - * We always add a rel to the hashtable when it is inquired about. + * If fsm_set_and_search found a suitable new block, return that. + * Otherwise, search as usual. */ - fsmrel = create_fsm_rel(rel); - - /* Do the Record */ - fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); - - /* - * Update the moving average of space requests, same as in - * GetPageWithFreeSpace. - */ - if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) - { - int cur_avg = (int) fsmrel->avgRequest; - - cur_avg += ((int) spaceNeeded - cur_avg) / 32; - fsmrel->avgRequest = (Size) cur_avg; - } - /* Do the Get */ - freepage = find_free_space(fsmrel, spaceNeeded); - LWLockRelease(FreeSpaceLock); - return freepage; -} - -/* - * GetAvgFSMRequestSize - get average FSM request size for a relation. - * - * If the relation is not known to FSM, return a default value. - */ -Size -GetAvgFSMRequestSize(RelFileNode *rel) -{ - Size result; - FSMRelation *fsmrel; - - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - fsmrel = lookup_fsm_rel(rel); - if (fsmrel) - result = fsmrel->avgRequest; + if (search_slot != -1) + return fsm_get_heap_blk(addr, search_slot); else - result = INITIAL_AVERAGE; - LWLockRelease(FreeSpaceLock); - return result; + return fsm_search(rel, search_cat); } /* - * RecordRelationFreeSpace - record available-space info about a relation. - * - * Any pre-existing info about the relation is assumed obsolete and discarded. - * - * interestingPages is the total number of pages in the relation that have - * at least threshold free space; nPages is the number actually reported in - * pageSpaces[] (may be less --- in particular, callers typically clamp their - * space usage to MaxFSMPages). + * RecordPageWithFreeSpace - update info about a page. * - * The given pageSpaces[] array must be sorted in order by blkno. Note that - * the FSM is at liberty to discard some or all of the data. + * Note that if the new spaceAvail value is higher than the old value stored + * in the FSM, the space might not become visible to searchers until the next + * FreeSpaceMapVacuum call, which updates the upper level pages. */ void -RecordRelationFreeSpace(RelFileNode *rel, - BlockNumber interestingPages, - int nPages, - FSMPageData *pageSpaces) +RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail) { - FSMRelation *fsmrel; + int new_cat = fsm_space_avail_to_cat(spaceAvail); + FSMAddress addr; + uint16 slot; - /* Limit nPages to something sane */ - if (nPages < 0) - nPages = 0; - else if (nPages > MaxFSMPages) - nPages = MaxFSMPages; + /* Get the location of the FSM byte representing the heap block */ + addr = fsm_get_location(heapBlk, &slot); - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - - /* - * Note we don't record info about a relation unless there's already an - * FSM entry for it, implying someone has done GetPageWithFreeSpace for - * it. Inactive rels thus will not clutter the map simply by being - * vacuumed. - */ - fsmrel = lookup_fsm_rel(rel); - if (fsmrel) - { - int curAlloc; - int curAllocPages; - FSMPageData *newLocation; - - curAlloc = realloc_fsm_rel(fsmrel, interestingPages, false); - curAllocPages = curAlloc * CHUNKPAGES; - - /* - * If the data fits in our current allocation, just copy it; otherwise - * must compress. - */ - newLocation = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - if (nPages <= curAllocPages) - { - int i; - - for (i = 0; i < nPages; i++) - { - BlockNumber page = FSMPageGetPageNum(&pageSpaces[i]); - - /* Check caller provides sorted data */ - if (i > 0 && page <= FSMPageGetPageNum(&pageSpaces[i - 1])) - elog(ERROR, "free-space data is not in page order"); - *newLocation = pageSpaces[i]; - newLocation++; - } - fsmrel->storedPages = nPages; - } - else - { - pack_incoming_pages(newLocation, curAllocPages, - pageSpaces, nPages); - fsmrel->storedPages = curAllocPages; - } - } - LWLockRelease(FreeSpaceLock); + fsm_set_and_search(rel, addr, slot, new_cat, 0); } /* - * GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes + * GetRecordedFreePage - return the amount of free space on a particular page, + * according to the FSM. */ -BlockNumber -GetFreeIndexPage(RelFileNode *rel) +Size +GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk) { - FSMRelation *fsmrel; - BlockNumber freepage; + FSMAddress addr; + uint16 slot; + Buffer buf; + uint8 cat; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); + /* Get the location of the FSM byte representing the heap block */ + addr = fsm_get_location(heapBlk, &slot); - /* - * We always add a rel to the hashtable when it is inquired about. - */ - fsmrel = create_fsm_rel(rel); + buf = fsm_readbuf(rel, addr, false); + if (!BufferIsValid(buf)) + return 0; + cat = fsm_get_avail(BufferGetPage(buf), slot); + ReleaseBuffer(buf); - freepage = find_index_free_space(fsmrel); - LWLockRelease(FreeSpaceLock); - return freepage; + return fsm_space_cat_to_avail(cat); } /* - * RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes + * FreeSpaceMapTruncateRel - adjust for truncation of a relation. + * + * The caller must hold AccessExclusiveLock on the relation, to ensure + * that other backends receive the relcache invalidation event that this + * function sends, before accessing the FSM again. + * + * nblocks is the new size of the heap. */ void -RecordIndexFreeSpace(RelFileNode *rel, - BlockNumber interestingPages, - int nPages, - BlockNumber *pages) +FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { - FSMRelation *fsmrel; + BlockNumber new_nfsmblocks; + FSMAddress first_removed_address; + uint16 first_removed_slot; + Buffer buf; - /* Limit nPages to something sane */ - if (nPages < 0) - nPages = 0; - else if (nPages > MaxFSMPages) - nPages = MaxFSMPages; + RelationOpenSmgr(rel); - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); + /* Get the location in the FSM of the first removed heap block */ + first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* - * Note we don't record info about a relation unless there's already an - * FSM entry for it, implying someone has done GetFreeIndexPage for it. - * Inactive rels thus will not clutter the map simply by being vacuumed. + * Zero out the tail of the last remaining FSM page. If the slot + * representing the first removed heap block is at a page boundary, as + * the first slot on the FSM page that first_removed_address points to, + * we can just truncate that page altogether. */ - fsmrel = lookup_fsm_rel(rel); - if (fsmrel) + if (first_removed_slot > 0) { - int curAlloc; - int curAllocPages; - int i; - IndexFSMPageData *newLocation; + buf = fsm_readbuf(rel, first_removed_address, false); + if (!BufferIsValid(buf)) + return; /* nothing to do; the FSM was already smaller */ + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); - curAlloc = realloc_fsm_rel(fsmrel, interestingPages, true); - curAllocPages = curAlloc * INDEXCHUNKPAGES; + new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; + } + else + { + new_nfsmblocks = fsm_logical_to_physical(first_removed_address); + if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) + return; /* nothing to do; the FSM was already smaller */ + } - /* - * If the data fits in our current allocation, just copy it; otherwise - * must compress. But compression is easy: we merely forget extra - * pages. - */ - newLocation = (IndexFSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - if (nPages > curAllocPages) - nPages = curAllocPages; + /* Truncate the unused FSM pages */ + smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp); - for (i = 0; i < nPages; i++) - { - BlockNumber page = pages[i]; + /* + * FSM truncations are WAL-logged, because we must never return a block + * that doesn't exist in the heap, not even if we crash before the FSM + * truncation has made it to disk. smgrtruncate() writes its own WAL + * record, but that's not enough to zero out the last remaining FSM page. + * (if we didn't need to zero out anything above, we can skip this) + */ + if (!rel->rd_istemp && !InRecovery && first_removed_slot != 0) + { + xl_fsm_truncate xlrec; + XLogRecData rdata; + XLogRecPtr recptr; - /* Check caller provides sorted data */ - if (i > 0 && page <= pages[i - 1]) - elog(ERROR, "free-space data is not in page order"); - IndexFSMPageSetPageNum(newLocation, page); - newLocation++; - } - fsmrel->storedPages = nPages; - } - LWLockRelease(FreeSpaceLock); -} + xlrec.node = rel->rd_node; + xlrec.nheapblocks = nblocks; -/* - * FreeSpaceMapTruncateRel - adjust for truncation of a relation. - * - * We need to delete any stored data past the new relation length, so that - * we don't bogusly return removed block numbers. - */ -void -FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks) -{ - FSMRelation *fsmrel; + rdata.data = (char *) &xlrec; + rdata.len = sizeof(xl_fsm_truncate); + rdata.buffer = InvalidBuffer; + rdata.next = NULL; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - fsmrel = lookup_fsm_rel(rel); - if (fsmrel) - { - int pageIndex; + recptr = XLogInsert(RM_FREESPACE_ID, XLOG_FSM_TRUNCATE, &rdata); - /* Use lookup to locate first entry >= nblocks */ - (void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex); - /* Delete all such entries */ - fsmrel->storedPages = pageIndex; - /* XXX should we adjust rel's interestingPages and sumRequests? */ + /* + * Flush, because otherwise the truncation of the main relation + * might hit the disk before the WAL record of truncating the + * FSM is flushed. If we crashed during that window, we'd be + * left with a truncated heap, without a truncated FSM. + */ + XLogFlush(recptr); } - LWLockRelease(FreeSpaceLock); + + /* + * Need to invalidate the relcache entry, because rd_fsm_nblocks_cache + * seen by other backends is no longer valid. + */ + if (!InRecovery) + CacheInvalidateRelcache(rel); + rel->rd_fsm_nblocks_cache = new_nfsmblocks; } /* - * FreeSpaceMapForgetRel - forget all about a relation. - * - * This is called when a relation is deleted. Although we could just let - * the rel age out of the map, it's better to reclaim and reuse the space - * sooner. + * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM */ void -FreeSpaceMapForgetRel(RelFileNode *rel) +FreeSpaceMapVacuum(Relation rel) { - FSMRelation *fsmrel; + bool dummy; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - fsmrel = lookup_fsm_rel(rel); - if (fsmrel) - delete_fsm_rel(fsmrel); - LWLockRelease(FreeSpaceLock); + /* + * Traverse the tree in depth-first order. The tree is stored physically + * in depth-first order, so this should be pretty I/O efficient. + */ + fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy); } +/******** Internal routines ********/ + /* - * FreeSpaceMapForgetDatabase - forget all relations of a database. - * - * This is called during DROP DATABASE. As above, might as well reclaim - * map space sooner instead of later. + * Return category corresponding x bytes of free space */ -void -FreeSpaceMapForgetDatabase(Oid dbid) +static uint8 +fsm_space_avail_to_cat(Size avail) { - FSMRelation *fsmrel, - *nextrel; + int cat; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = nextrel) - { - nextrel = fsmrel->nextUsage; /* in case we delete it */ - if (fsmrel->key.dbNode == dbid) - delete_fsm_rel(fsmrel); - } - LWLockRelease(FreeSpaceLock); -} + Assert(avail < BLCKSZ); -/* - * PrintFreeSpaceMapStatistics - print statistics about FSM contents - * - * The info is sent to ereport() with the specified message level. This is - * intended for use during VACUUM. - */ -void -PrintFreeSpaceMapStatistics(int elevel) -{ - FSMRelation *fsmrel; - int storedPages = 0; - double sumRequests = 0; - int numRels; - double needed; + if (avail >= MaxFSMRequestSize) + return 255; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); + cat = avail / FSM_CAT_STEP; /* - * Count total space actually used, as well as the unclamped request total + * The highest category, 255, is reserved for MaxFSMRequestSize bytes or + * more. */ - for (fsmrel = FreeSpaceMap->firstRel; - fsmrel != NULL; - fsmrel = fsmrel->nextPhysical) - { - storedPages += fsmrel->storedPages; - sumRequests += fsm_calc_request_unclamped(fsmrel); - } + if (cat > 254) + cat = 254; - /* Copy other stats before dropping lock */ - numRels = FreeSpaceMap->numRels; - LWLockRelease(FreeSpaceLock); - - /* Convert stats to actual number of page slots needed */ - needed = (sumRequests + numRels) * CHUNKPAGES; - - ereport(elevel, - (errmsg("free space map contains %d pages in %d relations", - storedPages, numRels), - errdetail("A total of %.0f page slots are in use (including overhead).\n" - "%.0f page slots are required to track all free space.\n" - "Current limits are: %d page slots, %d relations, using %.0f kB.", - Min(needed, MaxFSMPages), - needed, - MaxFSMPages, MaxFSMRelations, - (double) FreeSpaceShmemSize() / 1024.0))); - - CheckFreeSpaceMapStatistics(NOTICE, numRels, needed); - /* Print to server logs too because is deals with a config variable. */ - CheckFreeSpaceMapStatistics(LOG, numRels, needed); + return (uint8) cat; } -static void -CheckFreeSpaceMapStatistics(int elevel, int numRels, double needed) +/* + * Return the lower bound of the range of free space represented by given + * category. + */ +static Size +fsm_space_cat_to_avail(uint8 cat) { - if (numRels == MaxFSMRelations) - ereport(elevel, - (errmsg("max_fsm_relations(%d) equals the number of relations checked", - MaxFSMRelations), - errhint("You have at least %d relations. " - "Consider increasing the configuration parameter \"max_fsm_relations\".", - numRels))); - else if (needed > MaxFSMPages) - ereport(elevel, - (errmsg("number of page slots needed (%.0f) exceeds max_fsm_pages (%d)", - needed, MaxFSMPages), - errhint("Consider increasing the configuration parameter \"max_fsm_pages\" " - "to a value over %.0f.", needed))); + /* The highest category represents exactly MaxFSMRequestSize bytes. */ + if (cat == 255) + return MaxFSMRequestSize; + else + return cat * FSM_CAT_STEP; } /* - * DumpFreeSpaceMap - dump contents of FSM into a disk file for later reload - * - * This is expected to be called during database shutdown, after updates to - * the FSM have stopped. We lock the FreeSpaceLock but that's purely pro - * forma --- if anyone else is still accessing FSM, there's a problem. + * Which category does a page need to have, to accommodate x bytes of data? + * While fsm_size_to_avail_cat() rounds down, this needs to round up. */ -void -DumpFreeSpaceMap(int code, Datum arg) +static uint8 +fsm_space_needed_to_cat(Size needed) { - FILE *fp; - FsmCacheFileHeader header; - FSMRelation *fsmrel; - - /* Try to create file */ - unlink(FSM_CACHE_FILENAME); /* in case it exists w/wrong permissions */ - - fp = AllocateFile(FSM_CACHE_FILENAME, PG_BINARY_W); - if (fp == NULL) - { - elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME); - return; - } - - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); - - /* Write file header */ - MemSet(&header, 0, sizeof(header)); - strcpy(header.label, FSM_CACHE_LABEL); - header.endian = FSM_CACHE_ENDIAN; - header.version = FSM_CACHE_VERSION; - header.numRels = FreeSpaceMap->numRels; - if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header)) - goto write_failed; - - /* For each relation, in order from least to most recently used... */ - for (fsmrel = FreeSpaceMap->usageListTail; - fsmrel != NULL; - fsmrel = fsmrel->priorUsage) - { - FsmCacheRelHeader relheader; - int nPages; - - /* Write relation header */ - MemSet(&relheader, 0, sizeof(relheader)); - relheader.key = fsmrel->key; - relheader.isIndex = fsmrel->isIndex; - relheader.avgRequest = fsmrel->avgRequest; - relheader.interestingPages = fsmrel->interestingPages; - relheader.storedPages = fsmrel->storedPages; - if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader)) - goto write_failed; - - /* Write the per-page data directly from the arena */ - nPages = fsmrel->storedPages; - if (nPages > 0) - { - Size len; - char *data; - - if (fsmrel->isIndex) - len = nPages * sizeof(IndexFSMPageData); - else - len = nPages * sizeof(FSMPageData); - data = (char *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - if (fwrite(data, 1, len, fp) != len) - goto write_failed; - } - } - - /* Clean up */ - LWLockRelease(FreeSpaceLock); - - if (FreeFile(fp)) - { - elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME); - /* Remove busted cache file */ - unlink(FSM_CACHE_FILENAME); - } + int cat; - return; + /* Can't ask for more space than the highest category represents */ + if (needed > MaxFSMRequestSize) + elog(ERROR, "invalid FSM request size %d", needed); -write_failed: - elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME); + if (needed == 0) + return 1; - /* Clean up */ - LWLockRelease(FreeSpaceLock); + cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP; - FreeFile(fp); + if (cat > 255) + cat = 255; - /* Remove busted cache file */ - unlink(FSM_CACHE_FILENAME); + return (uint8) cat; } /* - * LoadFreeSpaceMap - load contents of FSM from a disk file - * - * This is expected to be called during database startup, before any FSM - * updates begin. We lock the FreeSpaceLock but that's purely pro - * forma --- if anyone else is accessing FSM yet, there's a problem. - * - * Notes: no complaint is issued if no cache file is found. If the file is - * found, it is deleted after reading. Thus, if we crash without a clean - * shutdown, the next cycle of life starts with no FSM data. To do otherwise, - * we'd need to do significantly more validation in this routine, because of - * the likelihood that what is in the dump file would be out-of-date, eg - * there might be entries for deleted or truncated rels. + * Returns the physical block number an FSM page */ -void -LoadFreeSpaceMap(void) +static BlockNumber +fsm_logical_to_physical(FSMAddress addr) { - FILE *fp; - FsmCacheFileHeader header; - int relno; - - /* Try to open file */ - fp = AllocateFile(FSM_CACHE_FILENAME, PG_BINARY_R); - if (fp == NULL) - { - if (errno != ENOENT) - elog(LOG, "could not read \"%s\": %m", FSM_CACHE_FILENAME); - return; - } + BlockNumber pages; + int leafno; + int l; - LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); + /* + * Calculate the logical page number of the first leaf page below the + * given page. + */ + leafno = addr.logpageno; + for (l = 0; l < addr.level; l++) + leafno *= SlotsPerFSMPage; - /* Read and verify file header */ - if (fread(&header, 1, sizeof(header), fp) != sizeof(header) || - strcmp(header.label, FSM_CACHE_LABEL) != 0 || - header.endian != FSM_CACHE_ENDIAN || - header.version != FSM_CACHE_VERSION || - header.numRels < 0) + /* Count upper level nodes required to address the leaf page */ + pages = 0; + for (l = 0; l < FSM_TREE_DEPTH; l++) { - elog(LOG, "bogus file header in \"%s\"", FSM_CACHE_FILENAME); - goto read_failed; + pages += leafno + 1; + leafno /= SlotsPerFSMPage; } - /* For each relation, in order from least to most recently used... */ - for (relno = 0; relno < header.numRels; relno++) - { - FsmCacheRelHeader relheader; - Size len; - char *data; - FSMRelation *fsmrel; - int nPages; - int curAlloc; - int curAllocPages; - - /* Read and verify relation header, as best we can */ - if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) || - (relheader.isIndex != false && relheader.isIndex != true) || - relheader.avgRequest >= BLCKSZ || - relheader.storedPages < 0) - { - elog(LOG, "bogus rel header in \"%s\"", FSM_CACHE_FILENAME); - goto read_failed; - } - - /* Read the per-page data */ - nPages = relheader.storedPages; - if (relheader.isIndex) - len = nPages * sizeof(IndexFSMPageData); - else - len = nPages * sizeof(FSMPageData); - data = (char *) palloc(len); - if (fread(data, 1, len, fp) != len) - { - elog(LOG, "premature EOF in \"%s\"", FSM_CACHE_FILENAME); - pfree(data); - goto read_failed; - } - - /* - * Okay, create the FSM entry and insert data into it. Since the rels - * were stored in reverse usage order, at the end of the loop they - * will be correctly usage-ordered in memory; and if MaxFSMRelations - * is less than it used to be, we will correctly drop the least - * recently used ones. - */ - fsmrel = create_fsm_rel(&relheader.key); - fsmrel->avgRequest = relheader.avgRequest; - - curAlloc = realloc_fsm_rel(fsmrel, relheader.interestingPages, - relheader.isIndex); - if (relheader.isIndex) - { - IndexFSMPageData *newLocation; - - curAllocPages = curAlloc * INDEXCHUNKPAGES; - - /* - * If the data fits in our current allocation, just copy it; - * otherwise must compress. But compression is easy: we merely - * forget extra pages. - */ - newLocation = (IndexFSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - if (nPages > curAllocPages) - nPages = curAllocPages; - memcpy(newLocation, data, nPages * sizeof(IndexFSMPageData)); - fsmrel->storedPages = nPages; - } - else - { - FSMPageData *newLocation; - - curAllocPages = curAlloc * CHUNKPAGES; - - /* - * If the data fits in our current allocation, just copy it; - * otherwise must compress. - */ - newLocation = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - if (nPages <= curAllocPages) - { - memcpy(newLocation, data, nPages * sizeof(FSMPageData)); - fsmrel->storedPages = nPages; - } - else - { - pack_existing_pages(newLocation, curAllocPages, - (FSMPageData *) data, nPages); - fsmrel->storedPages = curAllocPages; - } - } - - pfree(data); - } - -read_failed: - - /* Clean up */ - LWLockRelease(FreeSpaceLock); - - FreeFile(fp); + /* + * If the page we were asked for wasn't at the bottom level, subtract + * the additional lower level pages we counted above. + */ + pages -= addr.level; - /* Remove cache file before it can become stale; see notes above */ - unlink(FSM_CACHE_FILENAME); + /* Turn the page count into 0-based block number */ + return pages - 1; } - -/* - * Internal routines. These all assume the caller holds the FreeSpaceLock. - */ - /* - * Lookup a relation in the hash table. If not present, return NULL. - * - * The relation's position in the LRU list is not changed. + * Return the FSM location corresponding to given heap block. */ -static FSMRelation * -lookup_fsm_rel(RelFileNode *rel) +static FSMAddress +fsm_get_location(BlockNumber heapblk, uint16 *slot) { - FSMRelation *fsmrel; + FSMAddress addr; - fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash, - (void *) rel, - HASH_FIND, - NULL); - if (!fsmrel) - return NULL; + addr.level = FSM_BOTTOM_LEVEL; + addr.logpageno = heapblk / SlotsPerFSMPage; + *slot = heapblk % SlotsPerFSMPage; - return fsmrel; + return addr; } /* - * Lookup a relation in the hash table, creating an entry if not present. - * - * On successful lookup, the relation is moved to the front of the LRU list. + * Return the heap block number corresponding to given location in the FSM. */ -static FSMRelation * -create_fsm_rel(RelFileNode *rel) +static BlockNumber +fsm_get_heap_blk(FSMAddress addr, uint16 slot) { - FSMRelation *fsmrel; - bool found; - - fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash, - (void *) rel, - HASH_ENTER, - &found); - - if (!found) - { - /* New hashtable entry, initialize it (hash_search set the key) */ - fsmrel->isIndex = false; /* until we learn different */ - fsmrel->avgRequest = INITIAL_AVERAGE; - fsmrel->interestingPages = 0; - fsmrel->firstChunk = -1; /* no space allocated */ - fsmrel->storedPages = 0; - fsmrel->nextPage = 0; - - /* Discard lowest-priority existing rel, if we are over limit */ - if (FreeSpaceMap->numRels >= MaxFSMRelations) - delete_fsm_rel(FreeSpaceMap->usageListTail); - - /* Add new entry at front of LRU list */ - link_fsm_rel_usage(fsmrel); - fsmrel->nextPhysical = NULL; /* not in physical-storage list */ - fsmrel->priorPhysical = NULL; - FreeSpaceMap->numRels++; - /* sumRequests is unchanged because request must be zero */ - } - else - { - /* Existing entry, move to front of LRU list */ - if (fsmrel->priorUsage != NULL) - { - unlink_fsm_rel_usage(fsmrel); - link_fsm_rel_usage(fsmrel); - } - } - - return fsmrel; + Assert(addr.level == FSM_BOTTOM_LEVEL); + return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot; } /* - * Remove an existing FSMRelation entry. + * Given a logical address of a child page, get the logical page number of + * the parent, and the slot within the parent corresponding to the child. */ -static void -delete_fsm_rel(FSMRelation *fsmrel) +static FSMAddress +fsm_get_parent(FSMAddress child, uint16 *slot) { - FSMRelation *result; - - FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel); - unlink_fsm_rel_usage(fsmrel); - unlink_fsm_rel_storage(fsmrel); - FreeSpaceMap->numRels--; - result = (FSMRelation *) hash_search(FreeSpaceMapRelHash, - (void *) &(fsmrel->key), - HASH_REMOVE, - NULL); - if (!result) - elog(ERROR, "FreeSpaceMap hashtable corrupted"); -} + FSMAddress parent; -/* - * Reallocate space for a FSMRelation. - * - * This is shared code for RecordRelationFreeSpace and RecordIndexFreeSpace. - * The return value is the actual new allocation, in chunks. - */ -static int -realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages, - bool isIndex) -{ - int myRequest; - int myAlloc; - int curAlloc; + Assert(child.level < FSM_ROOT_LEVEL); - /* - * Delete any existing entries, and update request status. - */ - fsmrel->storedPages = 0; - FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel); - fsmrel->interestingPages = interestingPages; - fsmrel->isIndex = isIndex; - myRequest = fsm_calc_request(fsmrel); - FreeSpaceMap->sumRequests += myRequest; - myAlloc = fsm_calc_target_allocation(myRequest); + parent.level = child.level + 1; + parent.logpageno = child.logpageno / SlotsPerFSMPage; + *slot = child.logpageno % SlotsPerFSMPage; - /* - * Need to reallocate space if (a) my target allocation is more than my - * current allocation, AND (b) my actual immediate need (myRequest+1 - * chunks) is more than my current allocation. Otherwise just store the - * new data in-place. - */ - curAlloc = fsm_current_allocation(fsmrel); - if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && interestingPages > 0) - { - /* Remove entry from storage list, and compact */ - unlink_fsm_rel_storage(fsmrel); - compact_fsm_storage(); - /* Reattach to end of storage list */ - link_fsm_rel_storage(fsmrel); - /* And allocate storage */ - fsmrel->firstChunk = FreeSpaceMap->usedChunks; - FreeSpaceMap->usedChunks += myAlloc; - curAlloc = myAlloc; - /* Watch out for roundoff error */ - if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks) - { - FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks; - curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk; - } - } - return curAlloc; + return parent; } /* - * Link a FSMRelation into the LRU list (always at the head). + * Given a logical address of a parent page, and a slot number get the + * logical address of the corresponding child page. */ -static void -link_fsm_rel_usage(FSMRelation *fsmrel) +static FSMAddress +fsm_get_child(FSMAddress parent, uint16 slot) { - fsmrel->priorUsage = NULL; - fsmrel->nextUsage = FreeSpaceMap->usageList; - FreeSpaceMap->usageList = fsmrel; - if (fsmrel->nextUsage != NULL) - fsmrel->nextUsage->priorUsage = fsmrel; - else - FreeSpaceMap->usageListTail = fsmrel; -} + FSMAddress child; -/* - * Delink a FSMRelation from the LRU list. - */ -static void -unlink_fsm_rel_usage(FSMRelation *fsmrel) -{ - if (fsmrel->priorUsage != NULL) - fsmrel->priorUsage->nextUsage = fsmrel->nextUsage; - else - FreeSpaceMap->usageList = fsmrel->nextUsage; - if (fsmrel->nextUsage != NULL) - fsmrel->nextUsage->priorUsage = fsmrel->priorUsage; - else - FreeSpaceMap->usageListTail = fsmrel->priorUsage; + Assert(parent.level > FSM_BOTTOM_LEVEL); - /* - * We don't bother resetting fsmrel's links, since it's about to be - * deleted or relinked at the head. - */ -} + child.level = parent.level - 1; + child.logpageno = parent.logpageno * SlotsPerFSMPage + slot; -/* - * Link a FSMRelation into the storage-order list (always at the tail). - */ -static void -link_fsm_rel_storage(FSMRelation *fsmrel) -{ - fsmrel->nextPhysical = NULL; - fsmrel->priorPhysical = FreeSpaceMap->lastRel; - if (FreeSpaceMap->lastRel != NULL) - FreeSpaceMap->lastRel->nextPhysical = fsmrel; - else - FreeSpaceMap->firstRel = fsmrel; - FreeSpaceMap->lastRel = fsmrel; + return child; } /* - * Delink a FSMRelation from the storage-order list, if it's in it. + * Read a FSM page. + * + * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is + * true, the FSM file is extended. */ -static void -unlink_fsm_rel_storage(FSMRelation *fsmrel) +static Buffer +fsm_readbuf(Relation rel, FSMAddress addr, bool extend) { - if (fsmrel->priorPhysical != NULL || FreeSpaceMap->firstRel == fsmrel) + BlockNumber blkno = fsm_logical_to_physical(addr); + + RelationOpenSmgr(rel); + + if (rel->rd_fsm_nblocks_cache == InvalidBlockNumber || + rel->rd_fsm_nblocks_cache <= blkno) + rel->rd_fsm_nblocks_cache = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); + + if (blkno >= rel->rd_fsm_nblocks_cache) { - if (fsmrel->priorPhysical != NULL) - fsmrel->priorPhysical->nextPhysical = fsmrel->nextPhysical; + if (extend) + fsm_extend(rel, blkno + 1); else - FreeSpaceMap->firstRel = fsmrel->nextPhysical; - if (fsmrel->nextPhysical != NULL) - fsmrel->nextPhysical->priorPhysical = fsmrel->priorPhysical; - else - FreeSpaceMap->lastRel = fsmrel->priorPhysical; + return InvalidBuffer; } - /* mark as not in list, since we may not put it back immediately */ - fsmrel->nextPhysical = NULL; - fsmrel->priorPhysical = NULL; - /* Also mark it as having no storage */ - fsmrel->firstChunk = -1; - fsmrel->storedPages = 0; + return ReadBufferWithFork(rel, FSM_FORKNUM, blkno); } /* - * Look to see if a page with at least the specified amount of space is - * available in the given FSMRelation. If so, return its page number, - * and advance the nextPage counter so that the next inquiry will return - * a different page if possible; also update the entry to show that the - * requested space is not available anymore. Return InvalidBlockNumber - * if no success. + * Ensure that the FSM fork is at least n_fsmblocks long, extending + * it if necessary with empty pages. And by empty, I mean pages filled + * with zeros, meaning there's no free space. */ -static BlockNumber -find_free_space(FSMRelation *fsmrel, Size spaceNeeded) +static void +fsm_extend(Relation rel, BlockNumber n_fsmblocks) { - FSMPageData *info; - int pagesToCheck, /* outer loop counter */ - pageIndex; /* current page index */ - - if (fsmrel->isIndex) - elog(ERROR, "find_free_space called for an index relation"); - info = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - pageIndex = fsmrel->nextPage; - /* Last operation may have left nextPage pointing past end */ - if (pageIndex >= fsmrel->storedPages) - pageIndex = 0; - - for (pagesToCheck = fsmrel->storedPages; pagesToCheck > 0; pagesToCheck--) - { - FSMPageData *page = info + pageIndex; - Size spaceAvail = FSMPageGetSpace(page); - - /* Check this page */ - if (spaceAvail >= spaceNeeded) - { - /* - * Found what we want --- adjust the entry, and update nextPage. - */ - FSMPageSetSpace(page, spaceAvail - spaceNeeded); - fsmrel->nextPage = pageIndex + 1; - return FSMPageGetPageNum(page); - } - /* Advance pageIndex, wrapping around if needed */ - if (++pageIndex >= fsmrel->storedPages) - pageIndex = 0; - } + BlockNumber n_fsmblocks_now; + Page pg; - return InvalidBlockNumber; /* nothing found */ -} - -/* - * As above, but for index case --- we only deal in whole pages. - */ -static BlockNumber -find_index_free_space(FSMRelation *fsmrel) -{ - IndexFSMPageData *info; - BlockNumber result; + pg = (Page) palloc(BLCKSZ); + PageInit(pg, BLCKSZ, 0); /* - * If isIndex isn't set, it could be that RecordIndexFreeSpace() has never - * yet been called on this relation, and we're still looking at the - * default setting from create_fsm_rel(). If so, just act as though - * there's no space. + * We use the relation extension lock to lock out other backends + * trying to extend the FSM at the same time. It also locks out + * extension of the main fork, unnecessarily, but extending the + * FSM happens seldom enough that it doesn't seem worthwhile to + * have a separate lock tag type for it. + * + * Note that another backend might have extended the relation + * before we get the lock. */ - if (!fsmrel->isIndex) + LockRelationForExtension(rel, ExclusiveLock); + + n_fsmblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); + while (n_fsmblocks_now < n_fsmblocks) { - if (fsmrel->storedPages == 0) - return InvalidBlockNumber; - elog(ERROR, "find_index_free_space called for a non-index relation"); + smgrextend(rel->rd_smgr, FSM_FORKNUM, n_fsmblocks_now, + (char *) pg, rel->rd_istemp); + n_fsmblocks_now++; } - /* - * For indexes, there's no need for the nextPage state variable; we just - * remove and return the first available page. (We could save cycles here - * by returning the last page, but it seems better to encourage re-use of - * lower-numbered pages.) - */ - if (fsmrel->storedPages <= 0) - return InvalidBlockNumber; /* no pages available */ - info = (IndexFSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - result = IndexFSMPageGetPageNum(info); - fsmrel->storedPages--; - memmove(info, info + 1, fsmrel->storedPages * sizeof(IndexFSMPageData)); - return result; -} - -/* - * fsm_record_free_space - guts of RecordFreeSpace operation (now only - * provided as part of RecordAndGetPageWithFreeSpace). - */ -static void -fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail) -{ - int pageIndex; + UnlockRelationForExtension(rel, ExclusiveLock); - if (fsmrel->isIndex) - elog(ERROR, "fsm_record_free_space called for an index relation"); - if (lookup_fsm_page_entry(fsmrel, page, &pageIndex)) - { - /* Found an existing entry for page; update it */ - FSMPageData *info; + pfree(pg); - info = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - info += pageIndex; - FSMPageSetSpace(info, spaceAvail); - } - else - { - /* - * No existing entry; ignore the call. We used to add the page to the - * FSM --- but in practice, if the page hasn't got enough space to - * satisfy the caller who's kicking it back to us, then it's probably - * uninteresting to everyone else as well. - */ - } + /* update the cache with the up-to-date size */ + rel->rd_fsm_nblocks_cache = n_fsmblocks_now; } /* - * Look for an entry for a specific page (block number) in a FSMRelation. - * Returns TRUE if a matching entry exists, else FALSE. + * Set value in given FSM page and slot. * - * The output argument *outPageIndex is set to indicate where the entry exists - * (if TRUE result) or could be inserted (if FALSE result). + * If minValue > 0, the updated page is also searched for a page with at + * least minValue of free space. If one is found, its slot number is + * returned, -1 otherwise. */ -static bool -lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, - int *outPageIndex) +static int +fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, + uint8 newValue, uint8 minValue) { - /* Check for empty relation */ - if (fsmrel->storedPages <= 0) - { - *outPageIndex = 0; - return false; - } + Buffer buf; + Page page; + int newslot = -1; - /* Do binary search */ - if (fsmrel->isIndex) - { - IndexFSMPageData *info; - int low, - high; - - info = (IndexFSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - low = 0; - high = fsmrel->storedPages - 1; - while (low <= high) - { - int middle; - BlockNumber probe; + buf = fsm_readbuf(rel, addr, true); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - middle = low + (high - low) / 2; - probe = IndexFSMPageGetPageNum(info + middle); - if (probe == page) - { - *outPageIndex = middle; - return true; - } - else if (probe < page) - low = middle + 1; - else - high = middle - 1; - } - *outPageIndex = low; - return false; - } - else - { - FSMPageData *info; - int low, - high; - - info = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - low = 0; - high = fsmrel->storedPages - 1; - while (low <= high) - { - int middle; - BlockNumber probe; + page = BufferGetPage(buf); - middle = low + (high - low) / 2; - probe = FSMPageGetPageNum(info + middle); - if (probe == page) - { - *outPageIndex = middle; - return true; - } - else if (probe < page) - low = middle + 1; - else - high = middle - 1; - } - *outPageIndex = low; - return false; + if (fsm_set_avail(page, slot, newValue)) + MarkBufferDirty(buf); + + if (minValue != 0) + { + /* Search while we still hold the lock */ + newslot = fsm_search_avail(buf, minValue, + addr.level == FSM_BOTTOM_LEVEL, + true); } + + UnlockReleaseBuffer(buf); + + return newslot; } /* - * Re-pack the FSM storage arena, dropping data if necessary to meet the - * current allocation target for each relation. At conclusion, all available - * space in the arena will be coalesced at the end. + * Search the tree for a heap page with at least min_cat of free space */ -static void -compact_fsm_storage(void) +static BlockNumber +fsm_search(Relation rel, uint8 min_cat) { - int nextChunkIndex = 0; - bool did_push = false; - FSMRelation *fsmrel; + int restarts = 0; + FSMAddress addr = FSM_ROOT_ADDRESS; - for (fsmrel = FreeSpaceMap->firstRel; - fsmrel != NULL; - fsmrel = fsmrel->nextPhysical) + for (;;) { - int newAlloc; - int newAllocPages; - int newChunkIndex; - int oldChunkIndex; - int curChunks; - char *newLocation; - char *oldLocation; + int slot; + Buffer buf; + uint8 max_avail; /* - * Calculate target allocation, make sure we don't overrun due to - * roundoff error + * Read the FSM page. The root page is created if it doesn't exist + * yet, to save future searchers the effort of having to call + * smgrnblocks() in fsm_readbuf(), only to see that the FSM is + * completely empty. */ - newAlloc = fsm_calc_target_allocation(fsm_calc_request(fsmrel)); - if (newAlloc > FreeSpaceMap->totalChunks - nextChunkIndex) - newAlloc = FreeSpaceMap->totalChunks - nextChunkIndex; - if (fsmrel->isIndex) - newAllocPages = newAlloc * INDEXCHUNKPAGES; - else - newAllocPages = newAlloc * CHUNKPAGES; - - /* - * Determine current size, current and new locations - */ - curChunks = fsm_current_chunks(fsmrel); - oldChunkIndex = fsmrel->firstChunk; - oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES; - newChunkIndex = nextChunkIndex; - newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES; + buf = fsm_readbuf(rel, addr, (addr.level != FSM_ROOT_LEVEL)); - /* - * It's possible that we have to move data down, not up, if the - * allocations of previous rels expanded. This normally means that - * our allocation expanded too (or at least got no worse), and ditto - * for later rels. So there should be room to move all our data down - * without dropping any --- but we might have to push down following - * rels to acquire the room. We don't want to do the push more than - * once, so pack everything against the end of the arena if so. - * - * In corner cases where we are on the short end of a roundoff choice - * that we were formerly on the long end of, it's possible that we - * have to move down and compress our data too. In fact, even after - * pushing down the following rels, there might not be as much space - * as we computed for this rel above --- that would imply that some - * following rel(s) are also on the losing end of roundoff choices. We - * could handle this fairly by doing the per-rel compactions - * out-of-order, but that seems like way too much complexity to deal - * with a very infrequent corner case. Instead, we simply drop pages - * from the end of the current rel's data until it fits. - */ - if (newChunkIndex > oldChunkIndex) + /* Search within the page */ + if (BufferIsValid(buf)) { - int limitChunkIndex; - - if (newAllocPages < fsmrel->storedPages) - { - /* move and compress --- just drop excess pages */ - fsmrel->storedPages = newAllocPages; - curChunks = fsm_current_chunks(fsmrel); - } - /* is there enough space? */ - if (fsmrel->nextPhysical != NULL) - limitChunkIndex = fsmrel->nextPhysical->firstChunk; - else - limitChunkIndex = FreeSpaceMap->totalChunks; - if (newChunkIndex + curChunks > limitChunkIndex) - { - /* not enough space, push down following rels */ - if (!did_push) - { - push_fsm_rels_after(fsmrel); - did_push = true; - } - /* now is there enough space? */ - if (fsmrel->nextPhysical != NULL) - limitChunkIndex = fsmrel->nextPhysical->firstChunk; - else - limitChunkIndex = FreeSpaceMap->totalChunks; - if (newChunkIndex + curChunks > limitChunkIndex) - { - /* uh-oh, forcibly cut the allocation to fit */ - newAlloc = limitChunkIndex - newChunkIndex; - - /* - * If newAlloc < 0 at this point, we are moving the rel's - * firstChunk into territory currently assigned to a later - * rel. This is okay so long as we do not copy any data. - * The rels will be back in nondecreasing firstChunk order - * at completion of the compaction pass. - */ - if (newAlloc < 0) - newAlloc = 0; - if (fsmrel->isIndex) - newAllocPages = newAlloc * INDEXCHUNKPAGES; - else - newAllocPages = newAlloc * CHUNKPAGES; - fsmrel->storedPages = newAllocPages; - curChunks = fsm_current_chunks(fsmrel); - } - } - memmove(newLocation, oldLocation, curChunks * CHUNKBYTES); + LockBuffer(buf, BUFFER_LOCK_SHARE); + slot = fsm_search_avail(buf, min_cat, + (addr.level == FSM_BOTTOM_LEVEL), + false); + if (slot == -1) + max_avail = fsm_get_max_avail(BufferGetPage(buf)); + UnlockReleaseBuffer(buf); + } + else + { + slot = -1; + max_avail = 0; } - else if (newAllocPages < fsmrel->storedPages) + + if (slot != -1) { /* - * Need to compress the page data. For an index, "compression" - * just means dropping excess pages; otherwise we try to keep the - * ones with the most space. + * Descend the tree, or return the found block if we're at the + * bottom. */ - if (fsmrel->isIndex) - { - fsmrel->storedPages = newAllocPages; - /* may need to move data */ - if (newChunkIndex != oldChunkIndex) - memmove(newLocation, oldLocation, newAlloc * CHUNKBYTES); - } - else - { - pack_existing_pages((FSMPageData *) newLocation, - newAllocPages, - (FSMPageData *) oldLocation, - fsmrel->storedPages); - fsmrel->storedPages = newAllocPages; - } + if (addr.level == FSM_BOTTOM_LEVEL) + return fsm_get_heap_blk(addr, slot); + + addr = fsm_get_child(addr, slot); } - else if (newChunkIndex != oldChunkIndex) + else if (addr.level == FSM_ROOT_LEVEL) { /* - * No compression needed, but must copy the data up + * At the root, failure means there's no page with enough free + * space in the FSM. Give up. */ - memmove(newLocation, oldLocation, curChunks * CHUNKBYTES); + return InvalidBlockNumber; } - fsmrel->firstChunk = newChunkIndex; - nextChunkIndex += newAlloc; - } - Assert(nextChunkIndex <= FreeSpaceMap->totalChunks); - FreeSpaceMap->usedChunks = nextChunkIndex; -} - -/* - * Push all FSMRels physically after afterRel to the end of the storage arena. - * - * We sometimes have to do this when deletion or truncation of a relation - * causes the allocations of remaining rels to expand markedly. We must - * temporarily push existing data down to the end so that we can move it - * back up in an orderly fashion. - */ -static void -push_fsm_rels_after(FSMRelation *afterRel) -{ - int nextChunkIndex = FreeSpaceMap->totalChunks; - FSMRelation *fsmrel; - - FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks; + else + { + uint16 parentslot; + FSMAddress parent; - for (fsmrel = FreeSpaceMap->lastRel; - fsmrel != NULL; - fsmrel = fsmrel->priorPhysical) - { - int chunkCount; - int newChunkIndex; - int oldChunkIndex; - char *newLocation; - char *oldLocation; + /* + * At lower level, failure can happen if the value in the upper- + * level node didn't reflect the value on the lower page. Update + * the upper node, to avoid falling into the same trap again, and + * start over. + * + * There's a race condition here, if another backend updates this + * page right after we release it, and gets the lock on the parent + * page before us. We'll then update the parent page with the now + * stale information we had. It's OK, because it should happen + * rarely, and will be fixed by the next vacuum. + */ + parent = fsm_get_parent(addr, &parentslot); + fsm_set_and_search(rel, parent, parentslot, max_avail, 0); - if (fsmrel == afterRel) - break; + /* + * If the upper pages are badly out of date, we might need to + * loop quite a few times, updating them as we go. Any + * inconsistencies should eventually be corrected and the loop + * should end. Looping indefinitely is nevertheless scary, so + * provide an emergency valve. + */ + if (restarts++ > 10000) + return InvalidBlockNumber; - chunkCount = fsm_current_chunks(fsmrel); - nextChunkIndex -= chunkCount; - newChunkIndex = nextChunkIndex; - oldChunkIndex = fsmrel->firstChunk; - if (newChunkIndex < oldChunkIndex) - { - /* we're pushing down, how can it move up? */ - elog(PANIC, "inconsistent entry sizes in FSM"); - } - else if (newChunkIndex > oldChunkIndex) - { - /* need to move it */ - newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES; - oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES; - memmove(newLocation, oldLocation, chunkCount * CHUNKBYTES); - fsmrel->firstChunk = newChunkIndex; + /* Start search all over from the root */ + addr = FSM_ROOT_ADDRESS; } } - Assert(nextChunkIndex >= 0); } + /* - * Pack a set of per-page freespace data into a smaller amount of space. - * - * The method is to compute a low-resolution histogram of the free space - * amounts, then determine which histogram bin contains the break point. - * We then keep all pages above that bin, none below it, and just enough - * of the pages in that bin to fill the output area exactly. + * Recursive guts of FreeSpaceMapVacuum */ -#define HISTOGRAM_BINS 64 - -static void -pack_incoming_pages(FSMPageData *newLocation, int newPages, - FSMPageData *pageSpaces, int nPages) +static uint8 +fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) { - int histogram[HISTOGRAM_BINS]; - int above, - binct, - i; - Size thresholdL, - thresholdU; - - Assert(newPages < nPages); /* else I shouldn't have been called */ - /* Build histogram */ - MemSet(histogram, 0, sizeof(histogram)); - for (i = 0; i < nPages; i++) - { - Size avail = FSMPageGetSpace(&pageSpaces[i]); - - if (avail >= BLCKSZ) - elog(ERROR, "bogus freespace amount"); - avail /= (BLCKSZ / HISTOGRAM_BINS); - histogram[avail]++; - } - /* Find the breakpoint bin */ - above = 0; - for (i = HISTOGRAM_BINS - 1; i >= 0; i--) - { - int sum = above + histogram[i]; + Buffer buf; + Page page; + uint8 max_avail; - if (sum > newPages) - break; - above = sum; - } - Assert(i >= 0); - thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */ - thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */ - binct = newPages - above; /* number to take from bp bin */ - /* And copy the appropriate data */ - for (i = 0; i < nPages; i++) + /* Read the page if it exists, or return EOF */ + buf = fsm_readbuf(rel, addr, false); + if (!BufferIsValid(buf)) { - BlockNumber page = FSMPageGetPageNum(&pageSpaces[i]); - Size avail = FSMPageGetSpace(&pageSpaces[i]); - - /* Check caller provides sorted data */ - if (i > 0 && page <= FSMPageGetPageNum(&pageSpaces[i - 1])) - elog(ERROR, "free-space data is not in page order"); - /* Save this page? */ - if (avail >= thresholdU || - (avail >= thresholdL && (--binct >= 0))) - { - *newLocation = pageSpaces[i]; - newLocation++; - newPages--; - } + *eof_p = true; + return 0; } - Assert(newPages == 0); -} - -/* - * Pack a set of per-page freespace data into a smaller amount of space. - * - * This is algorithmically identical to pack_incoming_pages(), but accepts - * a different input representation. Also, we assume the input data has - * previously been checked for validity (size in bounds, pages in order). - * - * Note: it is possible for the source and destination arrays to overlap. - * The caller is responsible for making sure newLocation is at lower addresses - * so that we can copy data moving forward in the arrays without problem. - */ -static void -pack_existing_pages(FSMPageData *newLocation, int newPages, - FSMPageData *oldLocation, int oldPages) -{ - int histogram[HISTOGRAM_BINS]; - int above, - binct, - i; - Size thresholdL, - thresholdU; - - Assert(newPages < oldPages); /* else I shouldn't have been called */ - /* Build histogram */ - MemSet(histogram, 0, sizeof(histogram)); - for (i = 0; i < oldPages; i++) - { - Size avail = FSMPageGetSpace(oldLocation + i); + else + *eof_p = false; - /* Shouldn't happen, but test to protect against stack clobber */ - if (avail >= BLCKSZ) - elog(ERROR, "bogus freespace amount"); - avail /= (BLCKSZ / HISTOGRAM_BINS); - histogram[avail]++; - } - /* Find the breakpoint bin */ - above = 0; - for (i = HISTOGRAM_BINS - 1; i >= 0; i--) - { - int sum = above + histogram[i]; + page = BufferGetPage(buf); - if (sum > newPages) - break; - above = sum; - } - Assert(i >= 0); - thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */ - thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */ - binct = newPages - above; /* number to take from bp bin */ - /* And copy the appropriate data */ - for (i = 0; i < oldPages; i++) + /* + * Recurse into children, and fix the information stored about them + * at this level. + */ + if (addr.level > FSM_BOTTOM_LEVEL) { - BlockNumber page = FSMPageGetPageNum(oldLocation + i); - Size avail = FSMPageGetSpace(oldLocation + i); + int slot; + bool eof = false; - /* Save this page? */ - if (avail >= thresholdU || - (avail >= thresholdL && (--binct >= 0))) + for (slot = 0; slot < SlotsPerFSMPage; slot++) { - FSMPageSetPageNum(newLocation, page); - FSMPageSetSpace(newLocation, avail); - newLocation++; - newPages--; - } - } - Assert(newPages == 0); -} + int child_avail; -/* - * Calculate number of chunks "requested" by a rel. The "request" is - * anything beyond the rel's one guaranteed chunk. - * - * Rel's interestingPages and isIndex settings must be up-to-date when called. - * - * See notes at top of file for details. - */ -static int -fsm_calc_request(FSMRelation *fsmrel) -{ - int req; + /* After we hit end-of-file, just clear the rest of the slots */ + if (!eof) + child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof); + else + child_avail = 0; - /* Convert page count to chunk count */ - if (fsmrel->isIndex) - { - /* test to avoid unsigned underflow at zero */ - if (fsmrel->interestingPages <= INDEXCHUNKPAGES) - return 0; - /* quotient will fit in int, even if interestingPages doesn't */ - req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES; - } - else - { - if (fsmrel->interestingPages <= CHUNKPAGES) - return 0; - req = (fsmrel->interestingPages - 1) / CHUNKPAGES; + /* Update information about the child */ + if (fsm_get_avail(page, slot) != child_avail) + { + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + fsm_set_avail(BufferGetPage(buf), slot, child_avail); + MarkBufferDirty(buf); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + } + } } + max_avail = fsm_get_max_avail(BufferGetPage(buf)); + /* - * We clamp the per-relation requests to at most half the arena size; this - * is intended to prevent a single bloated relation from crowding out FSM - * service for every other rel. + * Reset the next slot pointer. This encourages the use of low-numbered + * pages, increasing the chances that a later vacuum can truncate the + * relation. */ - req = Min(req, FreeSpaceMap->totalChunks / 2); - - return req; -} + ((FSMPage) PageGetContents(page))->fp_next_slot = 0; -/* - * Same as above, but without the clamp ... this is just intended for - * reporting the total space needed to store all information. - */ -static int -fsm_calc_request_unclamped(FSMRelation *fsmrel) -{ - int req; - - /* Convert page count to chunk count */ - if (fsmrel->isIndex) - { - /* test to avoid unsigned underflow at zero */ - if (fsmrel->interestingPages <= INDEXCHUNKPAGES) - return 0; - /* quotient will fit in int, even if interestingPages doesn't */ - req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES; - } - else - { - if (fsmrel->interestingPages <= CHUNKPAGES) - return 0; - req = (fsmrel->interestingPages - 1) / CHUNKPAGES; - } + ReleaseBuffer(buf); - return req; + return max_avail; } -/* - * Calculate target allocation (number of chunks) for a rel - * - * Parameter is the result from fsm_calc_request(). The global sumRequests - * and numRels totals must be up-to-date already. - * - * See notes at top of file for details. - */ -static int -fsm_calc_target_allocation(int myRequest) -{ - double spareChunks; - int extra; - spareChunks = FreeSpaceMap->totalChunks - FreeSpaceMap->numRels; - Assert(spareChunks > 0); - if (spareChunks >= FreeSpaceMap->sumRequests) - { - /* We aren't oversubscribed, so allocate exactly the request */ - extra = myRequest; - } - else - { - extra = (int) rint(spareChunks * myRequest / FreeSpaceMap->sumRequests); - if (extra < 0) /* shouldn't happen, but make sure */ - extra = 0; - } - return 1 + extra; -} +/****** WAL-logging ******/ -/* - * Calculate number of chunks actually used to store current data - */ -static int -fsm_current_chunks(FSMRelation *fsmrel) +void +fsm_redo(XLogRecPtr lsn, XLogRecord *record) { - int chunkCount; - - /* Make sure storedPages==0 produces right answer */ - if (fsmrel->storedPages <= 0) - return 0; - /* Convert page count to chunk count */ - if (fsmrel->isIndex) - chunkCount = (fsmrel->storedPages - 1) / INDEXCHUNKPAGES + 1; - else - chunkCount = (fsmrel->storedPages - 1) / CHUNKPAGES + 1; - return chunkCount; -} + uint8 info = record->xl_info & ~XLR_INFO_MASK; -/* - * Calculate current actual allocation (number of chunks) for a rel - */ -static int -fsm_current_allocation(FSMRelation *fsmrel) -{ - if (fsmrel->nextPhysical != NULL) - return fsmrel->nextPhysical->firstChunk - fsmrel->firstChunk; - else if (fsmrel == FreeSpaceMap->lastRel) - return FreeSpaceMap->usedChunks - fsmrel->firstChunk; - else + switch (info) { - /* it's not in the storage-order list */ - Assert(fsmrel->firstChunk < 0 && fsmrel->storedPages == 0); - return 0; - } -} - - -/* - * Return the FreeSpaceMap structure for examination. - */ -FSMHeader * -GetFreeSpaceMap(void) -{ + case XLOG_FSM_TRUNCATE: + { + xl_fsm_truncate *xlrec; + Relation rel; - return FreeSpaceMap; + xlrec = (xl_fsm_truncate *) XLogRecGetData(record); + rel = CreateFakeRelcacheEntry(xlrec->node); + FreeSpaceMapTruncateRel(rel, xlrec->nheapblocks); + FreeFakeRelcacheEntry(rel); + } + break; + default: + elog(PANIC, "fsm_redo: unknown op code %u", info); + } } - -#ifdef FREESPACE_DEBUG -/* - * Dump contents of freespace map for debugging. - * - * We assume caller holds the FreeSpaceLock, or is otherwise unconcerned - * about other processes. - */ void -DumpFreeSpace(void) +fsm_desc(StringInfo buf, uint8 xl_info, char *rec) { - FSMRelation *fsmrel; - FSMRelation *prevrel = NULL; - int relNum = 0; - int nPages; + uint8 info = xl_info & ~XLR_INFO_MASK; - for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage) + switch (info) { - relNum++; - fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u interestingPages %u nextPage %d\nMap= ", - relNum, - fsmrel->key.spcNode, fsmrel->key.dbNode, fsmrel->key.relNode, - (int) fsmrel->isIndex, fsmrel->avgRequest, - fsmrel->interestingPages, fsmrel->nextPage); - if (fsmrel->isIndex) - { - IndexFSMPageData *page; - - page = (IndexFSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - for (nPages = 0; nPages < fsmrel->storedPages; nPages++) - { - fprintf(stderr, " %u", - IndexFSMPageGetPageNum(page)); - page++; - } - } - else + case XLOG_FSM_TRUNCATE: { - FSMPageData *page; + xl_fsm_truncate *xlrec = (xl_fsm_truncate *) rec; - page = (FSMPageData *) - (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); - for (nPages = 0; nPages < fsmrel->storedPages; nPages++) - { - fprintf(stderr, " %u:%u", - FSMPageGetPageNum(page), - FSMPageGetSpace(page)); - page++; - } + appendStringInfo(buf, "truncate: rel %u/%u/%u; nheapblocks %u;", + xlrec->node.spcNode, xlrec->node.dbNode, + xlrec->node.relNode, xlrec->nheapblocks); + break; } - fprintf(stderr, "\n"); - /* Cross-check list links */ - if (prevrel != fsmrel->priorUsage) - fprintf(stderr, "DumpFreeSpace: broken list links\n"); - prevrel = fsmrel; + default: + appendStringInfo(buf, "UNKNOWN"); + break; } - if (prevrel != FreeSpaceMap->usageListTail) - fprintf(stderr, "DumpFreeSpace: broken list links\n"); - /* Cross-check global counters */ - if (relNum != FreeSpaceMap->numRels) - fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n", - relNum, FreeSpaceMap->numRels); } - -#endif /* FREESPACE_DEBUG */ |