summaryrefslogtreecommitdiff
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-06 17:36:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-06 17:36:48 -0700
commit76e45035348c247a70ed50eb29a9906657e4444f (patch)
treee4101b34b1a3ddfea00be656586c22f704b33a2d /fs/btrfs/raid56.c
parent4c0ed7d8d6e3dc013c4599a837de84794baa5b62 (diff)
parentcbddcc4fa3443fe8cfb2ff8e210deb1f6a0eea38 (diff)
Merge tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "There's a bunch of performance improvements, most notably the FIEMAP speedup, the new block group tree to speed up mount on large filesystems, more io_uring integration, some sysfs exports and the usual fixes and core updates. Summary: Performance: - outstanding FIEMAP speed improvement - algorithmic change how extents are enumerated leads to orders of magnitude speed boost (uncached and cached) - extent sharing check speedup (2.2x uncached, 3x cached) - add more cancellation points, allowing to interrupt seeking in files with large number of extents - more efficient hole and data seeking (4x uncached, 1.3x cached) - sample results: 256M, 32K extents: 4s -> 29ms (~150x) 512M, 64K extents: 30s -> 59ms (~550x) 1G, 128K extents: 225s -> 120ms (~1800x) - improved inode logging, especially for directories (on dbench workload throughput +25%, max latency -21%) - improved buffered IO, remove redundant extent state tracking, lowering memory consumption and avoiding rb tree traversal - add sysfs tunable to let qgroup temporarily skip exact accounting when deleting snapshot, leading to a speedup but requiring a rescan after that, will be used by snapper - support io_uring and buffered writes, until now it was just for direct IO, with the no-wait semantics implemented in the buffered write path it now works and leads to speed improvement in IOPS (2x), throughput (2.2x), latency (depends, 2x to 150x) - small performance improvements when dropping and searching for extent maps as well as when flushing delalloc in COW mode (throughput +5MB/s) User visible changes: - new incompatible feature block-group-tree adding a dedicated tree for tracking block groups, this allows a much faster load during mount and avoids seeking unlike when it's scattered in the extent tree items - this reduces mount time for many-terabyte sized filesystems - conversion tool will be provided so existing filesystem can also be updated in place - to reduce test matrix and feature combinations requires no-holes and free-space-tree (mkfs defaults since 5.15) - improved reporting of super block corruption detected by scrub - scrub also tries to repair super block and does not wait until next commit - discard stats and tunables are exported in sysfs (/sys/fs/btrfs/FSID/discard) - qgroup status is exported in sysfs (/sys/sys/fs/btrfs/FSID/qgroups/) - verify that super block was not modified when thawing filesystem Fixes: - FIEMAP fixes - fix extent sharing status, does not depend on the cached status where merged - flush delalloc so compressed extents are reported correctly - fix alignment of VMA for memory mapped files on THP - send: fix failures when processing inodes with no links (orphan files and directories) - fix race between quota enable and quota rescan ioctl - handle more corner cases for read-only compat feature verification - fix missed extent on fsync after dropping extent maps Core: - lockdep annotations to validate various transactions states and state transitions - preliminary support for fs-verity in send - more effective memory use in scrub for subpage where sector is smaller than page - block group caching progress logic has been removed, load is now synchronous - simplify end IO callbacks and bio handling, use chained bios instead of own tracking - add no-wait semantics to several functions (tree search, nocow, flushing, buffered write - cleanups and refactoring MM changes: - export balance_dirty_pages_ratelimited_flags" * tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (177 commits) btrfs: set generation before calling btrfs_clean_tree_block in btrfs_init_new_buffer btrfs: drop extent map range more efficiently btrfs: avoid pointless extent map tree search when flushing delalloc btrfs: remove unnecessary next extent map search btrfs: remove unnecessary NULL pointer checks when searching extent maps btrfs: assert tree is locked when clearing extent map from logging btrfs: remove unnecessary extent map initializations btrfs: remove the refcount warning/check at free_extent_map() btrfs: add helper to replace extent map range with a new extent map btrfs: move open coded extent map tree deletion out of inode eviction btrfs: use cond_resched_rwlock_write() during inode eviction btrfs: use extent_map_end() at btrfs_drop_extent_map_range() btrfs: move btrfs_drop_extent_cache() to extent_map.c btrfs: fix missed extent on fsync after dropping extent maps btrfs: remove stale prototype of btrfs_write_inode btrfs: enable nowait async buffered writes btrfs: assert nowait mode is not used for some btree search functions btrfs: make btrfs_buffered_write nowait compatible btrfs: plumb NOWAIT through the write path btrfs: make lock_and_cleanup_extent_if_need nowait compatible ...
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c45
1 files changed, 7 insertions, 38 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2feb5c20641a..f6395e8288d6 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -275,7 +275,6 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
/* Also inherit the bitmaps from @victim. */
bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
dest->stripe_nsectors);
- dest->generic_bio_cnt += victim->generic_bio_cnt;
bio_list_init(&victim->bio_list);
}
@@ -814,8 +813,6 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *extra;
- if (rbio->generic_bio_cnt)
- btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
/*
* Clear the data bitmap, as the rbio may be cached for later usage.
* do this before before unlock_stripe() so there will be no new bio
@@ -946,6 +943,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
spin_lock_init(&rbio->bio_list_lock);
INIT_LIST_HEAD(&rbio->stripe_cache);
INIT_LIST_HEAD(&rbio->hash_list);
+ btrfs_get_bioc(bioc);
rbio->bioc = bioc;
rbio->nr_pages = num_pages;
rbio->nr_sectors = num_sectors;
@@ -1813,15 +1811,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
- btrfs_put_bioc(bioc);
ret = PTR_ERR(rbio);
- goto out_dec_counter;
+ goto fail;
}
rbio->operation = BTRFS_RBIO_WRITE;
rbio_add_bio(rbio, bio);
- rbio->generic_bio_cnt = 1;
-
/*
* don't plug on full rbios, just get them out the door
* as quickly as we can
@@ -1829,7 +1824,7 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
if (rbio_is_full(rbio)) {
ret = full_stripe_write(rbio);
if (ret)
- goto out_dec_counter;
+ goto fail;
return;
}
@@ -1844,13 +1839,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
} else {
ret = __raid56_parity_write(rbio);
if (ret)
- goto out_dec_counter;
+ goto fail;
}
return;
-out_dec_counter:
- btrfs_bio_counter_dec(fs_info);
+fail:
bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
}
@@ -2198,18 +2192,11 @@ cleanup:
* of the drive.
*/
void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
- int mirror_num, bool generic_io)
+ int mirror_num)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
- if (generic_io) {
- ASSERT(bioc->mirror_num == mirror_num);
- btrfs_bio(bio)->mirror_num = mirror_num;
- } else {
- btrfs_get_bioc(bioc);
- }
-
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
@@ -2225,14 +2212,11 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
__func__, bio->bi_iter.bi_sector << 9,
(u64)bio->bi_iter.bi_size, bioc->map_type);
- kfree(rbio);
+ __free_raid_bio(rbio);
bio->bi_status = BLK_STS_IOERR;
goto out_end_bio;
}
- if (generic_io)
- rbio->generic_bio_cnt = 1;
-
/*
* Loop retry:
* for 'mirror == 2', reconstruct from all other stripes.
@@ -2261,8 +2245,6 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
return;
out_end_bio:
- btrfs_bio_counter_dec(fs_info);
- btrfs_put_bioc(bioc);
bio_endio(bio);
}
@@ -2326,13 +2308,6 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
ASSERT(i < rbio->real_stripes);
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
-
- /*
- * We have already increased bio_counter when getting bioc, record it
- * so we can free it at rbio_orig_end_io().
- */
- rbio->generic_bio_cnt = 1;
-
return rbio;
}
@@ -2772,12 +2747,6 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
return NULL;
}
- /*
- * When we get bioc, we have already increased bio_counter, record it
- * so we can free it at rbio_orig_end_io()
- */
- rbio->generic_bio_cnt = 1;
-
return rbio;
}