summaryrefslogtreecommitdiff
path: root/src/backend/storage/freespace/freespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/freespace/freespace.c')
-rw-r--r--src/backend/storage/freespace/freespace.c285
1 files changed, 279 insertions, 6 deletions
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index eee82860575..d3f207b8540 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -76,6 +76,14 @@
#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
#define FSM_BOTTOM_LEVEL 0
+/* Status codes for the local map. */
+
+/* Either already tried, or beyond the end of the relation */
+#define FSM_LOCAL_NOT_AVAIL 0x00
+
+/* Available to try */
+#define FSM_LOCAL_AVAIL 0x01
+
/*
* The internal FSM routines work on a logical addressing scheme. Each
* level of the tree can be thought of as a separately addressable file.
@@ -89,6 +97,23 @@ typedef struct
/* Address of the root page. */
static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
+/* Local map of block numbers for small heaps with no FSM. */
+typedef struct
+{
+ BlockNumber nblocks;
+ uint8 map[HEAP_FSM_CREATION_THRESHOLD];
+} FSMLocalMap;
+
+static FSMLocalMap fsm_local_map =
+{
+ 0,
+ {
+ FSM_LOCAL_NOT_AVAIL
+ }
+};
+
+#define FSM_LOCAL_MAP_EXISTS (fsm_local_map.nblocks > 0)
+
/* functions to navigate the tree */
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
@@ -107,10 +132,14 @@ static Size fsm_space_cat_to_avail(uint8 cat);
/* workhorse functions for various operations */
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
uint8 newValue, uint8 minValue);
+static void fsm_local_set(Relation rel, BlockNumber cur_nblocks);
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
+static BlockNumber fsm_local_search(void);
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
BlockNumber start, BlockNumber end,
bool *eof);
+static bool fsm_allow_writes(Relation rel, BlockNumber heapblk,
+ BlockNumber nblocks, BlockNumber *get_nblocks);
/******** Public API ********/
@@ -127,13 +156,46 @@ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
* amount of free space available on that page and then try again (see
* RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
* extend the relation.
+ *
+ * For very small heap relations that don't have a FSM, we try every other
+ * page before extending the relation. To keep track of which pages have
+ * been tried, initialize a local in-memory map of pages.
*/
BlockNumber
-GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
+GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only)
{
uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
+ BlockNumber target_block,
+ nblocks;
+
+ /* First try the FSM, if it exists. */
+ target_block = fsm_search(rel, min_cat);
+
+ if (target_block == InvalidBlockNumber &&
+ (rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
+ !check_fsm_only)
+ {
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
+ {
+ /*
+ * If the FSM knows nothing of the rel, try the last page before
+ * we give up and extend. This avoids one-tuple-per-page syndrome
+ * during bootstrapping or in a recently-started system.
+ */
+ target_block = nblocks - 1;
+ }
+ else if (nblocks > 0)
+ {
+ /* Create or update local map and get first candidate block. */
+ fsm_local_set(rel, nblocks);
+ target_block = fsm_local_search();
+ }
+ }
- return fsm_search(rel, min_cat);
+ return target_block;
}
/*
@@ -144,16 +206,47 @@ GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
* also some effort to return a page close to the old page; if there's a
* page with enough free space on the same FSM page where the old one page
* is located, it is preferred.
+ *
+ * For very small heap relations that don't have a FSM, we update the local
+ * map to indicate we have tried a page, and return the next page to try.
*/
BlockNumber
RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
Size oldSpaceAvail, Size spaceNeeded)
{
- int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
- int search_cat = fsm_space_needed_to_cat(spaceNeeded);
+ int old_cat;
+ int search_cat;
FSMAddress addr;
uint16 slot;
int search_slot;
+ BlockNumber nblocks = InvalidBlockNumber;
+
+ /* First try the local map, if it exists. */
+ if (FSM_LOCAL_MAP_EXISTS)
+ {
+ Assert((rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
+ fsm_local_map.map[oldPage] == FSM_LOCAL_AVAIL);
+
+ fsm_local_map.map[oldPage] = FSM_LOCAL_NOT_AVAIL;
+ return fsm_local_search();
+ }
+
+ if (!fsm_allow_writes(rel, oldPage, InvalidBlockNumber, &nblocks))
+ {
+ /*
+ * If we have neither a local map nor a FSM, we probably just tried
+ * the target block in the smgr relation entry and failed, so we'll
+ * need to create the local map.
+ */
+ fsm_local_set(rel, nblocks);
+ return fsm_local_search();
+ }
+
+ /* Normal FSM logic follows */
+
+ old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
+ search_cat = fsm_space_needed_to_cat(spaceNeeded);
/* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(oldPage, &slot);
@@ -176,21 +269,45 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
* Note that if the new spaceAvail value is higher than the old value stored
* in the FSM, the space might not become visible to searchers until the next
* FreeSpaceMapVacuum call, which updates the upper level pages.
+ *
+ * Callers have no need for a local map.
*/
void
-RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
+RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
+ Size spaceAvail, BlockNumber nblocks)
{
- int new_cat = fsm_space_avail_to_cat(spaceAvail);
+ int new_cat;
FSMAddress addr;
uint16 slot;
+ BlockNumber dummy;
+
+ if (!fsm_allow_writes(rel, heapBlk, nblocks, &dummy))
+ /* No FSM to update and no local map either */
+ return;
/* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(heapBlk, &slot);
+ new_cat = fsm_space_avail_to_cat(spaceAvail);
fsm_set_and_search(rel, addr, slot, new_cat, 0);
}
/*
+ * Clear the local map. We must call this when we have found a block with
+ * enough free space, when we extend the relation, or on transaction abort.
+ */
+void
+FSMClearLocalMap(void)
+{
+ if (FSM_LOCAL_MAP_EXISTS)
+ {
+ fsm_local_map.nblocks = 0;
+ memset(&fsm_local_map.map, FSM_LOCAL_NOT_AVAIL,
+ sizeof(fsm_local_map.map));
+ }
+}
+
+/*
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
* WAL replay
*/
@@ -204,6 +321,31 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
BlockNumber blkno;
Buffer buf;
Page page;
+ bool write_to_fsm;
+
+ /* This is meant to mirror the logic in fsm_allow_writes() */
+ if (heapBlk >= HEAP_FSM_CREATION_THRESHOLD)
+ write_to_fsm = true;
+ else
+ {
+ /* Open the relation at smgr level */
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+ if (smgrexists(smgr, FSM_FORKNUM))
+ write_to_fsm = true;
+ else
+ {
+ BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
+
+ if (heap_nblocks > HEAP_FSM_CREATION_THRESHOLD)
+ write_to_fsm = true;
+ else
+ write_to_fsm = false;
+ }
+ }
+
+ if (!write_to_fsm)
+ return;
/* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(heapBlk, &slot);
@@ -904,3 +1046,134 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
return max_avail;
}
+
+/*
+ * For heaps, we prevent creation of the FSM unless the number of pages
+ * exceeds HEAP_FSM_CREATION_THRESHOLD. For tables that don't already have
+ * a FSM, this will save an inode and a few kB of space.
+ *
+ * XXX The API is a little awkward -- if the caller passes a valid nblocks
+ * value, it can avoid invoking a system call. If the caller passes
+ * InvalidBlockNumber and receives a false return value, it can get an
+ * up-to-date relation size from get_nblocks. This saves a few cycles in
+ * the caller, which would otherwise need to get the relation size by itself.
+ */
+static bool
+fsm_allow_writes(Relation rel, BlockNumber heapblk,
+ BlockNumber nblocks, BlockNumber *get_nblocks)
+{
+ bool skip_get_nblocks;
+
+ if (heapblk >= HEAP_FSM_CREATION_THRESHOLD)
+ return true;
+
+ /* Non-heap rels can always create a FSM. */
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_TOASTVALUE)
+ return true;
+
+ /*
+ * If the caller knows nblocks, we can avoid a system call later. If it
+ * doesn't, maybe we have relpages from a previous VACUUM. Since the table
+ * may have extended since then, we still have to count the pages later if
+ * we can't return now.
+ */
+ if (nblocks != InvalidBlockNumber)
+ {
+ if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
+ return true;
+ else
+ skip_get_nblocks = true;
+ }
+ else
+ {
+ if (rel->rd_rel->relpages != InvalidBlockNumber &&
+ rel->rd_rel->relpages > HEAP_FSM_CREATION_THRESHOLD)
+ return true;
+ else
+ skip_get_nblocks = false;
+ }
+
+ RelationOpenSmgr(rel);
+ if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
+ return true;
+
+ if (skip_get_nblocks)
+ return false;
+
+ /* last resort */
+ *get_nblocks = RelationGetNumberOfBlocks(rel);
+ if (*get_nblocks > HEAP_FSM_CREATION_THRESHOLD)
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Initialize or update the local map of blocks to try, for when there is
+ * no FSM.
+ *
+ * When we initialize the map, the whole heap is potentially available to
+ * try. Testing revealed that trying every block can cause a small
+ * performance dip compared to when we use a FSM, so we try every other
+ * block instead.
+ */
+static void
+fsm_local_set(Relation rel, BlockNumber cur_nblocks)
+{
+ BlockNumber blkno,
+ cached_target_block;
+
+ /* The local map must not be set already. */
+ Assert(!FSM_LOCAL_MAP_EXISTS);
+
+ /*
+ * Starting at the current last block in the relation and working
+ * backwards, mark alternating blocks as available.
+ */
+ blkno = cur_nblocks - 1;
+ while (true)
+ {
+ fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
+ if (blkno >= 2)
+ blkno -= 2;
+ else
+ break;
+ }
+
+ /* Cache the number of blocks. */
+ fsm_local_map.nblocks = cur_nblocks;
+
+ /* Set the status of the cached target block to 'unavailable'. */
+ cached_target_block = RelationGetTargetBlock(rel);
+ if (cached_target_block != InvalidBlockNumber &&
+ cached_target_block < cur_nblocks)
+ fsm_local_map.map[cached_target_block] = FSM_LOCAL_NOT_AVAIL;
+}
+
+/*
+ * Search the local map for an available block to try, in descending order.
+ * As such, there is no heuristic available to decide which order will be
+ * better to try, but the probability of having space in the last block in the
+ * map is higher because that is the most recent block added to the heap.
+ *
+ * This function is used when there is no FSM.
+ */
+static BlockNumber
+fsm_local_search(void)
+{
+ BlockNumber target_block;
+
+ /* Local map must be set by now. */
+ Assert(FSM_LOCAL_MAP_EXISTS);
+
+ target_block = fsm_local_map.nblocks;
+ do
+ {
+ target_block--;
+ if (fsm_local_map.map[target_block] == FSM_LOCAL_AVAIL)
+ return target_block;
+ } while (target_block > 0);
+
+ return InvalidBlockNumber;
+}