summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig12
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/accessors.c2
-rw-r--r--fs/btrfs/backref.c26
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/bio.c54
-rw-r--r--fs/btrfs/bio.h2
-rw-r--r--fs/btrfs/block-group.c41
-rw-r--r--fs/btrfs/block-group.h2
-rw-r--r--fs/btrfs/btrfs_inode.h21
-rw-r--r--fs/btrfs/compression.c265
-rw-r--r--fs/btrfs/compression.h61
-rw-r--r--fs/btrfs/ctree.c135
-rw-r--r--fs/btrfs/defrag.c4
-rw-r--r--fs/btrfs/delayed-inode.c189
-rw-r--r--fs/btrfs/delayed-inode.h93
-rw-r--r--fs/btrfs/delayed-ref.c13
-rw-r--r--fs/btrfs/delayed-ref.h9
-rw-r--r--fs/btrfs/dev-replace.c12
-rw-r--r--fs/btrfs/direct-io.c12
-rw-r--r--fs/btrfs/disk-io.c99
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-io-tree.c4
-rw-r--r--fs/btrfs/extent-io-tree.h2
-rw-r--r--fs/btrfs/extent-tree.c104
-rw-r--r--fs/btrfs/extent-tree.h7
-rw-r--r--fs/btrfs/extent_io.c167
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/extent_map.c24
-rw-r--r--fs/btrfs/fiemap.c2
-rw-r--r--fs/btrfs/file-item.c60
-rw-r--r--fs/btrfs/file.c49
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/free-space-tree.c60
-rw-r--r--fs/btrfs/fs.c48
-rw-r--r--fs/btrfs/fs.h41
-rw-r--r--fs/btrfs/inode-item.c10
-rw-r--r--fs/btrfs/inode.c538
-rw-r--r--fs/btrfs/ioctl.c69
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/lzo.c93
-rw-r--r--fs/btrfs/messages.c1
-rw-r--r--fs/btrfs/messages.h1
-rw-r--r--fs/btrfs/misc.h49
-rw-r--r--fs/btrfs/print-tree.c256
-rw-r--r--fs/btrfs/qgroup.c50
-rw-r--r--fs/btrfs/raid-stripe-tree.c17
-rw-r--r--fs/btrfs/raid56.c121
-rw-r--r--fs/btrfs/raid56.h4
-rw-r--r--fs/btrfs/ref-verify.c12
-rw-r--r--fs/btrfs/ref-verify.h4
-rw-r--r--fs/btrfs/reflink.c15
-rw-r--r--fs/btrfs/relocation.c81
-rw-r--r--fs/btrfs/root-tree.c66
-rw-r--r--fs/btrfs/scrub.c95
-rw-r--r--fs/btrfs/scrub.h2
-rw-r--r--fs/btrfs/send.c373
-rw-r--r--fs/btrfs/space-info.c8
-rw-r--r--fs/btrfs/subpage.c2
-rw-r--r--fs/btrfs/subpage.h2
-rw-r--r--fs/btrfs/super.c66
-rw-r--r--fs/btrfs/sysfs.c16
-rw-r--r--fs/btrfs/tests/delayed-refs-tests.c4
-rw-r--r--fs/btrfs/tests/extent-map-tests.c2
-rw-r--r--fs/btrfs/transaction.c49
-rw-r--r--fs/btrfs/tree-checker.c43
-rw-r--r--fs/btrfs/tree-log.c1888
-rw-r--r--fs/btrfs/verity.c10
-rw-r--r--fs/btrfs/volumes.c75
-rw-r--r--fs/btrfs/volumes.h4
-rw-r--r--fs/btrfs/zlib.c86
-rw-r--r--fs/btrfs/zoned.c76
-rw-r--r--fs/btrfs/zoned.h9
-rw-r--r--fs/btrfs/zstd.c198
76 files changed, 3555 insertions, 2484 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index ea95c90c8474..4438637c8900 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -62,6 +62,7 @@ config BTRFS_FS_RUN_SANITY_TESTS
config BTRFS_DEBUG
bool "Btrfs debugging support"
depends on BTRFS_FS
+ select REF_TRACKER if STACKTRACE_SUPPORT
help
Enable run-time debugging support for the btrfs filesystem.
@@ -117,14 +118,3 @@ config BTRFS_EXPERIMENTAL
- large folio support
If unsure, say N.
-
-config BTRFS_FS_REF_VERIFY
- bool "Btrfs with the ref verify tool compiled in"
- depends on BTRFS_FS
- default n
- help
- Enable run-time extent reference verification instrumentation. This
- is meant to be used by btrfs developers for tracking down extent
- reference problems or verifying they didn't break something.
-
- If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 2d5f0482678b..743d7677b175 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -36,7 +36,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
-btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
+btrfs-$(CONFIG_BTRFS_DEBUG) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
btrfs-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
index 861c7d92c437..1248aa2535d3 100644
--- a/fs/btrfs/accessors.c
+++ b/fs/btrfs/accessors.c
@@ -44,7 +44,7 @@ static __always_inline void memcpy_split_src(char *dest, const char *src1,
* gives us all the type checking.
*
* The extent buffer pages stored in the array folios may not form a contiguous
- * phyusical range, but the API functions assume the linear offset to the range
+ * physical range, but the API functions assume the linear offset to the range
* from 0 to metadata node size.
*/
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6a450be293b1..2ab550a1e715 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -859,7 +859,7 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
free_pref(ref);
return PTR_ERR(eb);
}
- if (!extent_buffer_uptodate(eb)) {
+ if (unlikely(!extent_buffer_uptodate(eb))) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
@@ -1062,7 +1062,7 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_get_extent_inline_ref_type(leaf, iref,
BTRFS_REF_TYPE_ANY);
- if (type == BTRFS_REF_TYPE_INVALID)
+ if (unlikely(type == BTRFS_REF_TYPE_INVALID))
return -EUCLEAN;
offset = btrfs_extent_inline_ref_offset(leaf, iref);
@@ -1422,7 +1422,7 @@ again:
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -1614,7 +1614,7 @@ again:
ret = PTR_ERR(eb);
goto out;
}
- if (!extent_buffer_uptodate(eb)) {
+ if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
@@ -1652,7 +1652,7 @@ again:
* case.
*/
ASSERT(eie);
- if (!eie) {
+ if (unlikely(!eie)) {
ret = -EUCLEAN;
goto out;
}
@@ -1690,7 +1690,7 @@ out:
* @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are
* added to the ulist at @ctx->refs, and that ulist is allocated by this
* function. The caller should free the ulist with free_leaf_list() if
- * @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is
+ * @ctx->ignore_extent_item_pos is false, otherwise a simple ulist_free() is
* enough.
*
* Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated.
@@ -2215,7 +2215,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -2312,7 +2312,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
*out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
BTRFS_REF_TYPE_ANY);
- if (*out_type == BTRFS_REF_TYPE_INVALID)
+ if (unlikely(*out_type == BTRFS_REF_TYPE_INVALID))
return -EUCLEAN;
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -2868,7 +2868,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -2876,7 +2876,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
ret = -EUCLEAN;
goto release;
}
- if (path->slots[0] == 0) {
+ if (unlikely(path->slots[0] == 0)) {
DEBUG_WARN();
ret = -EUCLEAN;
goto release;
@@ -3457,7 +3457,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
/* No extra backref? This means the tree block is corrupted */
- if (ret > 0) {
+ if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -3500,7 +3500,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
((unsigned long)iter->cur_ptr);
type = btrfs_get_extent_inline_ref_type(eb, iref,
BTRFS_REF_TYPE_BLOCK);
- if (type == BTRFS_REF_TYPE_INVALID) {
+ if (unlikely(type == BTRFS_REF_TYPE_INVALID)) {
ret = -EUCLEAN;
goto out;
}
@@ -3612,7 +3612,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
}
/* Sanity check, we shouldn't have any unchecked nodes */
- if (!upper->checked) {
+ if (unlikely(!upper->checked)) {
DEBUG_WARN("we should not have any unchecked nodes");
return -EUCLEAN;
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 34b0193a181c..25d51c246070 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -190,7 +190,7 @@ struct btrfs_backref_share_check_ctx {
* It's very common to have several file extent items that point to the
* same extent (bytenr) but with different offsets and lengths. This
* typically happens for COW writes, partial writes into prealloc
- * extents, NOCOW writes after snapshoting a root, hole punching or
+ * extents, NOCOW writes after snapshotting a root, hole punching or
* reflinking within the same file (less common perhaps).
* So keep a small cache with the lookup results for the extent pointed
* by the last few file extent items. This cache is checked, with a
@@ -414,7 +414,7 @@ struct btrfs_backref_cache {
/*
* Whether this cache is for relocation
*
- * Reloction backref cache require more info for reloc root compared
+ * Relocation backref cache require more info for reloc root compared
* to generic backref cache.
*/
bool is_reloc;
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 50b5fc1c06d7..21df48e6c4fa 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -93,6 +93,7 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
}
+ bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}
@@ -166,7 +167,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
int mirror = repair_bbio->mirror_num;
if (repair_bbio->bio.bi_status ||
- !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
+ !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
@@ -203,18 +204,21 @@ done:
*/
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
u32 bio_offset,
- struct bio_vec *bv,
+ phys_addr_t paddr,
struct btrfs_failed_bio *fbio)
{
struct btrfs_inode *inode = failed_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct folio *folio = page_folio(phys_to_page(paddr));
const u32 sectorsize = fs_info->sectorsize;
+ const u32 foff = offset_in_folio(folio, paddr);
const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
struct btrfs_bio *repair_bbio;
struct bio *repair_bio;
int num_copies;
int mirror;
+ ASSERT(foff + sectorsize <= folio_size(folio));
btrfs_debug(fs_info, "repair read error: read error at %llu",
failed_bbio->file_offset + bio_offset);
@@ -237,7 +241,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
- __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+ bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
repair_bbio = btrfs_bio(repair_bio);
btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
@@ -258,6 +262,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
struct bvec_iter *iter = &bbio->saved_iter;
blk_status_t status = bbio->bio.bi_status;
struct btrfs_failed_bio *fbio = NULL;
+ phys_addr_t paddr;
u32 offset = 0;
/* Read-repair requires the inode field to be set by the submitter. */
@@ -275,17 +280,11 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
/* Clear the I/O error. A failed repair will reset it. */
bbio->bio.bi_status = BLK_STS_OK;
- while (iter->bi_size) {
- struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
-
- bv.bv_len = min(bv.bv_len, sectorsize);
- if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv))
- fbio = repair_one_sector(bbio, offset, &bv, fbio);
-
- bio_advance_iter_single(&bbio->bio, iter, sectorsize);
+ btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
+ if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
+ fbio = repair_one_sector(bbio, offset, paddr, fbio);
offset += sectorsize;
}
-
if (bbio->csum != bbio->csum_inline)
kfree(bbio->csum);
@@ -780,11 +779,38 @@ end_bbio:
return true;
}
+static void assert_bbio_alignment(struct btrfs_bio *bbio)
+{
+#ifdef CONFIG_BTRFS_ASSERT
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+ const u32 blocksize = fs_info->sectorsize;
+
+ /* Metadata has no extra bs > ps alignment requirement. */
+ if (!is_data_bbio(bbio))
+ return;
+
+ bio_for_each_bvec(bvec, &bbio->bio, iter)
+ ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) &&
+ IS_ALIGNED(bvec.bv_len, blocksize),
+ "root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u",
+ btrfs_root_id(bbio->inode->root),
+ btrfs_ino(bbio->inode),
+ bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT,
+ bbio->bio.bi_iter.bi_size, iter.bi_idx,
+ bvec.bv_offset,
+ bvec.bv_len);
+#endif
+}
+
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
{
/* If bbio->inode is not populated, its file_offset must be 0. */
ASSERT(bbio->inode || bbio->file_offset == 0);
+ assert_bbio_alignment(bbio);
+
while (!btrfs_submit_chunk(bbio, mirror_num))
;
}
@@ -823,8 +849,8 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
if (ret < 0)
goto out_counter_dec;
- if (!smap.dev->bdev ||
- !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {
+ if (unlikely(!smap.dev->bdev ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state))) {
ret = -EIO;
goto out_counter_dec;
}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index dc2eb43b7097..00883aea55d7 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -82,6 +82,8 @@ struct btrfs_bio {
/* Save the first error status of split bio. */
blk_status_t status;
+ /* Use the commit root to look up csums (data read bio only). */
+ bool csum_search_commit_root;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9bf282d2453c..5322ef2ae015 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1358,7 +1358,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
* data in this block group. That check should be done by relocation routine,
* not this function.
*/
-static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
+static int inc_block_group_ro(struct btrfs_block_group *cache, bool force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
@@ -1795,7 +1795,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
- return bg1->used > bg2->used;
+ /*
+ * Some other task may be updating the ->used field concurrently, but it
+ * is not serious if we get a stale value or load/store tearing issues,
+ * as sorting the list of block groups to reclaim is not critical and an
+ * occasional imperfect order is ok. So silence KCSAN and avoid the
+ * overhead of locking or any other synchronization.
+ */
+ return data_race(bg1->used > bg2->used);
}
static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
@@ -1964,7 +1971,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
* called, which is where we will transfer a reserved extent's
* size from the "reserved" counter to the "used" counter - this
* happens when running delayed references. When we relocate the
- * chunk below, relocation first flushes dellaloc, waits for
+ * chunk below, relocation first flushes delalloc, waits for
* ordered extent completion (which is where we create delayed
* references for data extents) and commits the current
* transaction (which runs delayed references), and only after
@@ -2031,7 +2038,7 @@ void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
btrfs_reclaim_sweep(fs_info);
spin_lock(&fs_info->unused_bgs_lock);
if (!list_empty(&fs_info->reclaim_bgs))
- queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
+ queue_work(system_dfl_wq, &fs_info->reclaim_bgs_work);
spin_unlock(&fs_info->unused_bgs_lock);
}
@@ -2064,7 +2071,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
return -ENOENT;
}
- if (map->start != key->objectid || map->chunk_len != key->offset) {
+ if (unlikely(map->start != key->objectid || map->chunk_len != key->offset)) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
key->objectid, key->offset, map->start, map->chunk_len);
@@ -2077,7 +2084,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
- if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ if (unlikely(flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
key->objectid, key->offset, flags,
@@ -2238,7 +2245,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return ret;
/* Shouldn't have super stripes in sequential zones */
- if (zoned && nr) {
+ if (unlikely(zoned && nr)) {
kfree(logical);
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
@@ -2329,7 +2336,7 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
break;
bg = btrfs_lookup_block_group(fs_info, map->start);
- if (!bg) {
+ if (unlikely(!bg)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu doesn't have corresponding block group",
map->start, map->chunk_len);
@@ -2337,9 +2344,9 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
btrfs_free_chunk_map(map);
break;
}
- if (bg->start != map->start || bg->length != map->chunk_len ||
- (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
- (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ if (unlikely(bg->start != map->start || bg->length != map->chunk_len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
map->start, map->chunk_len,
@@ -2832,7 +2839,7 @@ next:
* space or none at all (due to no need to COW, extent buffers
* were already COWed in the current transaction and still
* unwritten, tree heights lower than the maximum possible
- * height, etc). For data we generally reserve the axact amount
+ * height, etc). For data we generally reserve the exact amount
* of space we are going to allocate later, the exception is
* when using compression, as we must reserve space based on the
* uncompressed data size, because the compression is only done
@@ -3241,7 +3248,7 @@ again:
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
/*
* So theoretically we could recover from this, simply set the
* super cache generation to 0 so we know to invalidate the
@@ -3988,7 +3995,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
struct btrfs_space_info *sys_space_info;
sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags);
- if (!sys_space_info) {
+ if (unlikely(!sys_space_info)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4002,17 +4009,17 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- } else if (ret) {
+ } else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index a8bb8429c966..9172104a5889 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -63,7 +63,7 @@ enum btrfs_discard_state {
* CHUNK_ALLOC_FORCE means it must try to allocate one
*
* CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
- * find_free_extent() that also activaes the zone
+ * find_free_extent() that also activates the zone
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0387b9f43a52..af373d50a901 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -338,6 +338,11 @@ struct btrfs_inode {
struct list_head delayed_iput;
struct rw_semaphore i_mmap_lock;
+
+#ifdef CONFIG_FS_VERITY
+ struct fsverity_info *i_verity_info;
+#endif
+
struct inode vfs_inode;
};
@@ -532,9 +537,9 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
/* We only allow BITS_PER_LONGS blocks for each bitmap. */
#ifdef CONFIG_BTRFS_EXPERIMENTAL
- mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0,
- ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits)
- >> PAGE_SHIFT)));
+ mapping_set_folio_order_range(inode->vfs_inode.i_mapping,
+ inode->root->fs_info->block_min_order,
+ inode->root->fs_info->block_max_order);
#endif
}
@@ -542,10 +547,12 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
- const u8 * const csum_expected);
+void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
+ u8 *dest);
+int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
+ const u8 * const csum_expected);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
- u32 bio_offset, struct bio_vec *bv);
+ u32 bio_offset, phys_addr_t paddr);
noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
struct btrfs_file_extent *file_extent,
bool nowait);
@@ -558,7 +565,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name);
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
- const struct fscrypt_str *name, int add_backref, u64 index);
+ const struct fscrypt_str *name, bool add_backref, u64 index);
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 end);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d09d622016ef..bacad18357b3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -90,19 +90,19 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
}
static int compression_compress_pages(int type, struct list_head *ws,
- struct address_space *mapping, u64 start,
+ struct btrfs_inode *inode, u64 start,
struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
- return zlib_compress_folios(ws, mapping, start, folios,
+ return zlib_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_LZO:
- return lzo_compress_folios(ws, mapping, start, folios,
+ return lzo_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_ZSTD:
- return zstd_compress_folios(ws, mapping, start, folios,
+ return zstd_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_NONE:
default:
@@ -223,10 +223,14 @@ static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_co
/*
* Common wrappers for page allocation from compression wrappers
*/
-struct folio *btrfs_alloc_compr_folio(void)
+struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info)
{
struct folio *folio = NULL;
+ /* For bs > ps cases, no cached folio pool for now. */
+ if (fs_info->block_min_order)
+ goto alloc;
+
spin_lock(&compr_pool.lock);
if (compr_pool.count > 0) {
folio = list_first_entry(&compr_pool.list, struct folio, lru);
@@ -238,13 +242,18 @@ struct folio *btrfs_alloc_compr_folio(void)
if (folio)
return folio;
- return folio_alloc(GFP_NOFS, 0);
+alloc:
+ return folio_alloc(GFP_NOFS, fs_info->block_min_order);
}
void btrfs_free_compr_folio(struct folio *folio)
{
bool do_free = false;
+ /* The folio is from bs > ps fs, no cached pool for now. */
+ if (folio_order(folio))
+ goto free;
+
spin_lock(&compr_pool.lock);
if (compr_pool.count > compr_pool.thresh) {
do_free = true;
@@ -257,6 +266,7 @@ void btrfs_free_compr_folio(struct folio *folio)
if (!do_free)
return;
+free:
ASSERT(folio_ref_count(folio) == 1);
folio_put(folio);
}
@@ -344,16 +354,19 @@ static void end_bbio_compressed_write(struct btrfs_bio *bbio)
static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = cb->bbio.fs_info;
struct bio *bio = &cb->bbio.bio;
u32 offset = 0;
while (offset < cb->compressed_len) {
+ struct folio *folio;
int ret;
- u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);
+ u32 len = min_t(u32, cb->compressed_len - offset,
+ btrfs_min_folio_size(fs_info));
+ folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)];
/* Maximum compressed extent is smaller than bio size limit. */
- ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT],
- len, 0);
+ ret = bio_add_folio(bio, folio, len, 0);
ASSERT(ret);
offset += len;
}
@@ -443,6 +456,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (fs_info->sectorsize < PAGE_SIZE)
return 0;
+ /* For bs > ps cases, we don't support readahead for compressed folios for now. */
+ if (fs_info->block_min_order)
+ return 0;
+
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (cur < compressed_end) {
@@ -602,17 +619,19 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
cb->compressed_len = compressed_len;
cb->compress_type = btrfs_extent_map_compression(em);
cb->orig_bbio = bbio;
+ cb->bbio.csum_search_commit_root = bbio->csum_search_commit_root;
btrfs_free_extent_map(em);
- cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
+ cb->nr_folios = DIV_ROUND_UP(compressed_len, btrfs_min_folio_size(fs_info));
cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS);
if (!cb->compressed_folios) {
status = BLK_STS_RESOURCE;
goto out_free_bio;
}
- ret = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
+ ret = btrfs_alloc_folio_array(cb->nr_folios, fs_info->block_min_order,
+ cb->compressed_folios);
if (ret) {
status = BLK_STS_RESOURCE;
goto out_free_compressed_pages;
@@ -687,8 +706,6 @@ struct heuristic_ws {
struct list_head list;
};
-static struct workspace_manager heuristic_wsm;
-
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
@@ -701,7 +718,7 @@ static void free_heuristic_ws(struct list_head *ws)
kfree(workspace);
}
-static struct list_head *alloc_heuristic_ws(void)
+static struct list_head *alloc_heuristic_ws(struct btrfs_fs_info *fs_info)
{
struct heuristic_ws *ws;
@@ -728,11 +745,9 @@ fail:
return ERR_PTR(-ENOMEM);
}
-const struct btrfs_compress_op btrfs_heuristic_compress = {
- .workspace_manager = &heuristic_wsm,
-};
+const struct btrfs_compress_levels btrfs_heuristic_compress = { 0 };
-static const struct btrfs_compress_op * const btrfs_compress_op[] = {
+static const struct btrfs_compress_levels * const btrfs_compress_levels[] = {
/* The heuristic is represented as compression type 0 */
&btrfs_heuristic_compress,
&btrfs_zlib_compress,
@@ -740,13 +755,13 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zstd_compress,
};
-static struct list_head *alloc_workspace(int type, int level)
+static struct list_head *alloc_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
switch (type) {
- case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws();
- case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
- case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace();
- case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
+ case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(fs_info);
+ case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(fs_info, level);
+ case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(fs_info);
+ case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(fs_info, level);
default:
/*
* This can't happen, the type is validated several times
@@ -772,44 +787,58 @@ static void free_workspace(int type, struct list_head *ws)
}
}
-static void btrfs_init_workspace_manager(int type)
+static int alloc_workspace_manager(struct btrfs_fs_info *fs_info,
+ enum btrfs_compression_type type)
{
- struct workspace_manager *wsm;
+ struct workspace_manager *gwsm;
struct list_head *workspace;
- wsm = btrfs_compress_op[type]->workspace_manager;
- INIT_LIST_HEAD(&wsm->idle_ws);
- spin_lock_init(&wsm->ws_lock);
- atomic_set(&wsm->total_ws, 0);
- init_waitqueue_head(&wsm->ws_wait);
+ ASSERT(fs_info->compr_wsm[type] == NULL);
+ gwsm = kzalloc(sizeof(*gwsm), GFP_KERNEL);
+ if (!gwsm)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&gwsm->idle_ws);
+ spin_lock_init(&gwsm->ws_lock);
+ atomic_set(&gwsm->total_ws, 0);
+ init_waitqueue_head(&gwsm->ws_wait);
+ fs_info->compr_wsm[type] = gwsm;
/*
* Preallocate one workspace for each compression type so we can
* guarantee forward progress in the worst case
*/
- workspace = alloc_workspace(type, 0);
+ workspace = alloc_workspace(fs_info, type, 0);
if (IS_ERR(workspace)) {
- btrfs_warn(NULL,
- "cannot preallocate compression workspace, will try later");
+ btrfs_warn(fs_info,
+ "cannot preallocate compression workspace for %s, will try later",
+ btrfs_compress_type2str(type));
} else {
- atomic_set(&wsm->total_ws, 1);
- wsm->free_ws = 1;
- list_add(workspace, &wsm->idle_ws);
+ atomic_set(&gwsm->total_ws, 1);
+ gwsm->free_ws = 1;
+ list_add(workspace, &gwsm->idle_ws);
}
+ return 0;
}
-static void btrfs_cleanup_workspace_manager(int type)
+static void free_workspace_manager(struct btrfs_fs_info *fs_info,
+ enum btrfs_compression_type type)
{
- struct workspace_manager *wsman;
struct list_head *ws;
+ struct workspace_manager *gwsm = fs_info->compr_wsm[type];
- wsman = btrfs_compress_op[type]->workspace_manager;
- while (!list_empty(&wsman->idle_ws)) {
- ws = wsman->idle_ws.next;
+ /* ZSTD uses its own workspace manager, should enter here. */
+ ASSERT(type != BTRFS_COMPRESS_ZSTD && type < BTRFS_NR_COMPRESS_TYPES);
+ if (!gwsm)
+ return;
+ fs_info->compr_wsm[type] = NULL;
+ while (!list_empty(&gwsm->idle_ws)) {
+ ws = gwsm->idle_ws.next;
list_del(ws);
free_workspace(type, ws);
- atomic_dec(&wsman->total_ws);
+ atomic_dec(&gwsm->total_ws);
}
+ kfree(gwsm);
}
/*
@@ -818,9 +847,9 @@ static void btrfs_cleanup_workspace_manager(int type)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
-struct list_head *btrfs_get_workspace(int type, int level)
+struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
- struct workspace_manager *wsm;
+ struct workspace_manager *wsm = fs_info->compr_wsm[type];
struct list_head *workspace;
int cpus = num_online_cpus();
unsigned nofs_flag;
@@ -830,7 +859,7 @@ struct list_head *btrfs_get_workspace(int type, int level)
wait_queue_head_t *ws_wait;
int *free_ws;
- wsm = btrfs_compress_op[type]->workspace_manager;
+ ASSERT(wsm);
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
@@ -866,7 +895,7 @@ again:
* context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
- workspace = alloc_workspace(type, level);
+ workspace = alloc_workspace(fs_info, type, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
@@ -889,7 +918,7 @@ again:
/* no burst */ 1);
if (__ratelimit(&_rs))
- btrfs_warn(NULL,
+ btrfs_warn(fs_info,
"no compression workspaces, low memory, retrying");
}
goto again;
@@ -897,13 +926,13 @@ again:
return workspace;
}
-static struct list_head *get_workspace(int type, int level)
+static struct list_head *get_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
switch (type) {
- case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
- case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
- case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level);
- case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
+ case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(fs_info, type, level);
+ case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(fs_info, level);
+ case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(fs_info, type, level);
+ case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(fs_info, level);
default:
/*
* This can't happen, the type is validated several times
@@ -917,21 +946,21 @@ static struct list_head *get_workspace(int type, int level)
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
-void btrfs_put_workspace(int type, struct list_head *ws)
+void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws)
{
- struct workspace_manager *wsm;
+ struct workspace_manager *gwsm = fs_info->compr_wsm[type];
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
- wsm = btrfs_compress_op[type]->workspace_manager;
- idle_ws = &wsm->idle_ws;
- ws_lock = &wsm->ws_lock;
- total_ws = &wsm->total_ws;
- ws_wait = &wsm->ws_wait;
- free_ws = &wsm->free_ws;
+ ASSERT(gwsm);
+ idle_ws = &gwsm->idle_ws;
+ ws_lock = &gwsm->ws_lock;
+ total_ws = &gwsm->total_ws;
+ ws_wait = &gwsm->ws_wait;
+ free_ws = &gwsm->free_ws;
spin_lock(ws_lock);
if (*free_ws <= num_online_cpus()) {
@@ -948,13 +977,13 @@ wake:
cond_wake_up(ws_wait);
}
-static void put_workspace(int type, struct list_head *ws)
+static void put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws)
{
switch (type) {
- case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
- case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
- case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws);
- case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
+ case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(fs_info, type, ws);
+ case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(fs_info, type, ws);
+ case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(fs_info, type, ws);
+ case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(fs_info, ws);
default:
/*
* This can't happen, the type is validated several times
@@ -970,12 +999,12 @@ static void put_workspace(int type, struct list_head *ws)
*/
static int btrfs_compress_set_level(unsigned int type, int level)
{
- const struct btrfs_compress_op *ops = btrfs_compress_op[type];
+ const struct btrfs_compress_levels *levels = btrfs_compress_levels[type];
if (level == 0)
- level = ops->default_level;
+ level = levels->default_level;
else
- level = clamp(level, ops->min_level, ops->max_level);
+ level = clamp(level, levels->min_level, levels->max_level);
return level;
}
@@ -985,9 +1014,9 @@ static int btrfs_compress_set_level(unsigned int type, int level)
*/
bool btrfs_compress_level_valid(unsigned int type, int level)
{
- const struct btrfs_compress_op *ops = btrfs_compress_op[type];
+ const struct btrfs_compress_levels *levels = btrfs_compress_levels[type];
- return ops->min_level <= level && level <= ops->max_level;
+ return levels->min_level <= level && level <= levels->max_level;
}
/* Wrapper around find_get_page(), with extra error message. */
@@ -1022,44 +1051,46 @@ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
* - compression algo are 0-3
* - the level are bits 4-7
*
- * @out_pages is an in/out parameter, holds maximum number of pages to allocate
- * and returns number of actually allocated pages
+ * @out_folios is an in/out parameter, holds maximum number of folios to allocate
+ * and returns number of actually allocated folios
*
* @total_in is used to return the number of bytes actually read. It
* may be smaller than the input length if we had to exit early because we
- * ran out of room in the pages array or because we cross the
+ * ran out of room in the folios array or because we cross the
* max_out threshold.
*
* @total_out is an in/out parameter, must be set to the input length and will
* be also used to return the total number of compressed bytes
*/
-int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
+int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
const unsigned long orig_len = *total_out;
struct list_head *workspace;
int ret;
level = btrfs_compress_set_level(type, level);
- workspace = get_workspace(type, level);
- ret = compression_compress_pages(type, workspace, mapping, start, folios,
+ workspace = get_workspace(fs_info, type, level);
+ ret = compression_compress_pages(type, workspace, inode, start, folios,
out_folios, total_in, total_out);
/* The total read-in bytes should be no larger than the input. */
ASSERT(*total_in <= orig_len);
- put_workspace(type, workspace);
+ put_workspace(fs_info, type, workspace);
return ret;
}
static int btrfs_decompress_bio(struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct list_head *workspace;
int ret;
int type = cb->compress_type;
- workspace = get_workspace(type, 0);
+ workspace = get_workspace(fs_info, type, 0);
ret = compression_decompress_bio(workspace, cb);
- put_workspace(type, workspace);
+ put_workspace(fs_info, type, workspace);
if (!ret)
zero_fill_bio(&cb->orig_bbio->bio);
@@ -1080,20 +1111,50 @@ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
int ret;
/*
- * The full destination page range should not exceed the page size.
+ * The full destination folio range should not exceed the folio size.
* And the @destlen should not exceed sectorsize, as this is only called for
* inline file extents, which should not exceed sectorsize.
*/
- ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
+ ASSERT(dest_pgoff + destlen <= folio_size(dest_folio) && destlen <= sectorsize);
- workspace = get_workspace(type, 0);
+ workspace = get_workspace(fs_info, type, 0);
ret = compression_decompress(type, workspace, data_in, dest_folio,
dest_pgoff, srclen, destlen);
- put_workspace(type, workspace);
+ put_workspace(fs_info, type, workspace);
return ret;
}
+int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+
+ ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_NONE);
+ if (ret < 0)
+ goto error;
+ ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB);
+ if (ret < 0)
+ goto error;
+ ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_LZO);
+ if (ret < 0)
+ goto error;
+ ret = zstd_alloc_workspace_manager(fs_info);
+ if (ret < 0)
+ goto error;
+ return 0;
+error:
+ btrfs_free_compress_wsm(fs_info);
+ return ret;
+}
+
+void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info)
+{
+ free_workspace_manager(fs_info, BTRFS_COMPRESS_NONE);
+ free_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB);
+ free_workspace_manager(fs_info, BTRFS_COMPRESS_LZO);
+ zstd_free_workspace_manager(fs_info);
+}
+
int __init btrfs_init_compress(void)
{
if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE,
@@ -1105,11 +1166,6 @@ int __init btrfs_init_compress(void)
if (!compr_pool.shrinker)
return -ENOMEM;
- btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
- btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
- btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
- zstd_init_workspace_manager();
-
spin_lock_init(&compr_pool.lock);
INIT_LIST_HEAD(&compr_pool.list);
compr_pool.count = 0;
@@ -1130,10 +1186,6 @@ void __cold btrfs_exit_compress(void)
btrfs_compr_pool_scan(NULL, NULL);
shrinker_free(compr_pool.shrinker);
- btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
- btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
- btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
- zstd_cleanup_workspace_manager();
bioset_exit(&btrfs_compressed_bioset);
}
@@ -1256,7 +1308,7 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
#define ENTROPY_LVL_HIGH (80)
/*
- * For increasead precision in shannon_entropy calculation,
+ * For increased precision in shannon_entropy calculation,
* let's do pow(n, M) to save more digits after comma:
*
* - maximum int bit length is 64
@@ -1542,7 +1594,8 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
*/
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
{
- struct list_head *ws_list = get_workspace(0, 0);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct list_head *ws_list = get_workspace(fs_info, 0, 0);
struct heuristic_ws *ws;
u32 i;
u8 byte;
@@ -1611,30 +1664,34 @@ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
}
out:
- put_workspace(0, ws_list);
+ put_workspace(fs_info, 0, ws_list);
return ret;
}
/*
- * Convert the compression suffix (eg. after "zlib" starting with ":") to
- * level, unrecognized string will set the default level. Negative level
- * numbers are allowed.
+ * Convert the compression suffix (eg. after "zlib" starting with ":") to level.
+ *
+ * If the resulting level exceeds the algo's supported levels, it will be clamped.
+ *
+ * Return <0 if no valid string can be found.
+ * Return 0 if everything is fine.
*/
-int btrfs_compress_str2level(unsigned int type, const char *str)
+int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret)
{
int level = 0;
int ret;
- if (!type)
+ if (!type) {
+ *level_ret = btrfs_compress_set_level(type, level);
return 0;
+ }
if (str[0] == ':') {
ret = kstrtoint(str + 1, 10, &level);
if (ret)
- level = 0;
+ return ret;
}
- level = btrfs_compress_set_level(type, level);
-
- return level;
+ *level_ret = btrfs_compress_set_level(type, level);
+ return 0;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 1b38e707bbd9..eba188a9e3bb 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -75,6 +75,11 @@ struct compressed_bio {
struct btrfs_bio bbio;
};
+static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb)
+{
+ return cb->bbio.fs_info;
+}
+
/* @range_end must be exclusive. */
static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur)
{
@@ -84,11 +89,14 @@ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u6
return min(range_end, folio_end(folio)) - cur;
}
+int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
+void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info);
+
int __init btrfs_init_compress(void);
void __cold btrfs_exit_compress(void);
bool btrfs_compress_level_valid(unsigned int type, int level);
-int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
+int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
@@ -102,21 +110,11 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
bool writeback);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
-int btrfs_compress_str2level(unsigned int type, const char *str);
+int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret);
-struct folio *btrfs_alloc_compr_folio(void);
+struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info);
void btrfs_free_compr_folio(struct folio *folio);
-enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LZO = 2,
- BTRFS_COMPRESS_ZSTD = 3,
- BTRFS_NR_COMPRESS_TYPES = 4,
-
- BTRFS_DEFRAG_DONT_COMPRESS,
-};
-
struct workspace_manager {
struct list_head idle_ws;
spinlock_t ws_lock;
@@ -128,11 +126,10 @@ struct workspace_manager {
wait_queue_head_t ws_wait;
};
-struct list_head *btrfs_get_workspace(int type, int level);
-void btrfs_put_workspace(int type, struct list_head *ws);
+struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level);
+void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws);
-struct btrfs_compress_op {
- struct workspace_manager *workspace_manager;
+struct btrfs_compress_levels {
/* Maximum level supported by the compression algorithm */
int min_level;
int max_level;
@@ -142,10 +139,10 @@ struct btrfs_compress_op {
/* The heuristic workspaces are managed via the 0th workspace manager */
#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
-extern const struct btrfs_compress_op btrfs_heuristic_compress;
-extern const struct btrfs_compress_op btrfs_zlib_compress;
-extern const struct btrfs_compress_op btrfs_lzo_compress;
-extern const struct btrfs_compress_op btrfs_zstd_compress;
+extern const struct btrfs_compress_levels btrfs_heuristic_compress;
+extern const struct btrfs_compress_levels btrfs_zlib_compress;
+extern const struct btrfs_compress_levels btrfs_lzo_compress;
+extern const struct btrfs_compress_levels btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
bool btrfs_compress_is_valid_type(const char *str, size_t len);
@@ -155,39 +152,39 @@ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
struct folio **in_folio_ret);
-int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
+int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
-struct list_head *zlib_alloc_workspace(unsigned int level);
+struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
void zlib_free_workspace(struct list_head *ws);
-struct list_head *zlib_get_workspace(unsigned int level);
+struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
-int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
+int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
-struct list_head *lzo_alloc_workspace(void);
+struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info);
void lzo_free_workspace(struct list_head *ws);
-int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
+int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
-void zstd_init_workspace_manager(void);
-void zstd_cleanup_workspace_manager(void);
-struct list_head *zstd_alloc_workspace(int level);
+int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info);
+void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info);
+struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level);
void zstd_free_workspace(struct list_head *ws);
-struct list_head *zstd_get_workspace(int level);
-void zstd_put_workspace(struct list_head *ws);
+struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level);
+void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws);
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 74e6d7f3d266..561658aca018 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -30,10 +30,10 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *ins_key, struct btrfs_path *path,
- int data_size, int extend);
+ int data_size, bool extend);
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
- struct extent_buffer *src, int empty);
+ struct extent_buffer *src, bool empty);
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
@@ -293,11 +293,11 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
- if (ret)
+ if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
} else {
ret = btrfs_inc_ref(trans, root, cow, 0);
- if (ret)
+ if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
}
if (ret) {
@@ -536,14 +536,14 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@@ -556,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
parent_start = buf->start;
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@@ -567,7 +567,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
parent_start, last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@@ -575,7 +575,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(trans->transid != btrfs_header_generation(parent));
ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
BTRFS_MOD_LOG_KEY_REPLACE);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@@ -586,14 +586,14 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(trans, parent);
if (last_ref) {
ret = btrfs_tree_mod_log_free_eb(buf);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
}
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
parent_start, last_ref);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@@ -613,15 +613,12 @@ error_unlock_cow:
return ret;
}
-static inline int should_cow_block(const struct btrfs_trans_handle *trans,
- const struct btrfs_root *root,
- const struct extent_buffer *buf)
+static inline bool should_cow_block(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf)
{
if (btrfs_is_testing(root->fs_info))
- return 0;
-
- /* Ensure we can see the FORCE_COW bit */
- smp_mb__before_atomic();
+ return false;
/*
* We do not need to cow a block if
@@ -634,13 +631,25 @@ static inline int should_cow_block(const struct btrfs_trans_handle *trans,
* after we've finished copying src root, we must COW the shared
* block to ensure the metadata consistency.
*/
- if (btrfs_header_generation(buf) == trans->transid &&
- !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
- !(btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
- btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
- return 0;
- return 1;
+
+ if (btrfs_header_generation(buf) != trans->transid)
+ return true;
+
+ if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN))
+ return true;
+
+ /* Ensure we can see the FORCE_COW bit. */
+ smp_mb__before_atomic();
+ if (test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
+ return true;
+
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
+ return false;
+
+ if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
+ return true;
+
+ return false;
}
/*
@@ -844,7 +853,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
&check);
if (IS_ERR(eb))
return eb;
- if (!extent_buffer_uptodate(eb)) {
+ if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
return ERR_PTR(-EIO);
}
@@ -913,7 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
}
ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
btrfs_abort_transaction(trans, ret);
@@ -935,7 +944,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
/* once for the root ptr */
free_extent_buffer_stale(mid);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1010,7 +1019,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
right, 0, 1);
free_extent_buffer_stale(right);
right = NULL;
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1019,7 +1028,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &right_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1071,7 +1080,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
mid = NULL;
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1081,7 +1090,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &mid_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1186,7 +1195,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_tree_unlock(left);
free_extent_buffer(left);
btrfs_abort_transaction(trans, ret);
@@ -1246,7 +1255,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
btrfs_abort_transaction(trans, ret);
@@ -1484,13 +1493,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
reada_for_search(fs_info, p, parent_level, slot, key->objectid);
/* first we do an atomic uptodate check */
- if (btrfs_buffer_uptodate(tmp, check.transid, 1) > 0) {
+ if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) {
/*
* Do extra check for first_key, eb can be stale due to
* being cached, read from scrub, or have multiple
* parents (shared tree blocks).
*/
- if (btrfs_verify_level_key(tmp, &check)) {
+ if (unlikely(btrfs_verify_level_key(tmp, &check))) {
ret = -EUCLEAN;
goto out;
}
@@ -1571,7 +1580,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!extent_buffer_uptodate(tmp)) {
+ if (unlikely(!extent_buffer_uptodate(tmp))) {
ret = -EIO;
goto out;
}
@@ -1752,7 +1761,7 @@ out:
* The root may have failed to write out at some point, and thus is no
* longer valid, return an error in this case.
*/
- if (!extent_buffer_uptodate(b)) {
+ if (unlikely(!extent_buffer_uptodate(b))) {
if (root_lock)
btrfs_tree_unlock_rw(b, root_lock);
free_extent_buffer(b);
@@ -2260,7 +2269,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
again:
b = btrfs_get_old_root(root, time_seq);
- if (!b) {
+ if (unlikely(!b)) {
ret = -EIO;
goto done;
}
@@ -2686,7 +2695,7 @@ static bool check_sibling_keys(const struct extent_buffer *left,
*/
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
- struct extent_buffer *src, int empty)
+ struct extent_buffer *src, bool empty)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int push_items = 0;
@@ -2722,13 +2731,13 @@ static int push_node_left(struct btrfs_trans_handle *trans,
push_items = min(src_nritems - 8, push_items);
/* dst is the left eb, src is the middle eb */
- if (check_sibling_keys(dst, src)) {
+ if (unlikely(check_sibling_keys(dst, src))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2796,7 +2805,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
push_items = max_push;
/* dst is the right eb, src is the middle eb */
- if (check_sibling_keys(src, dst)) {
+ if (unlikely(check_sibling_keys(src, dst))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
@@ -2813,7 +2822,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
push_items);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2883,7 +2892,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_clear_buffer_dirty(trans, c);
ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
- if (ret2 < 0)
+ if (unlikely(ret2 < 0))
btrfs_abort_transaction(trans, ret2);
btrfs_tree_unlock(c);
free_extent_buffer(c);
@@ -2928,7 +2937,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
slot, nritems - slot);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2941,7 +2950,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_key(lower, slot,
BTRFS_MOD_LOG_KEY_ADD);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3017,7 +3026,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ASSERT(btrfs_header_level(c) == level);
ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_tree_unlock(split);
free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
@@ -3086,7 +3095,7 @@ int btrfs_leaf_free_space(const struct extent_buffer *leaf)
int ret;
ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_crit(fs_info,
"leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret,
@@ -3102,7 +3111,7 @@ int btrfs_leaf_free_space(const struct extent_buffer *leaf)
*/
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
- int data_size, int empty,
+ int data_size, bool empty,
struct extent_buffer *right,
int free_space, u32 left_nritems,
u32 min_slot)
@@ -3239,7 +3248,7 @@ out_unlock:
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path,
int min_data_size, int data_size,
- int empty, u32 min_slot)
+ bool empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -3278,7 +3287,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- if (check_sibling_keys(left, right)) {
+ if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
@@ -3316,7 +3325,7 @@ out_unlock:
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_path *path, int data_size,
- int empty, struct extent_buffer *left,
+ bool empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
u32 max_slot)
{
@@ -3494,7 +3503,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- if (check_sibling_keys(left, right)) {
+ if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3642,7 +3651,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const struct btrfs_key *ins_key,
struct btrfs_path *path, int data_size,
- int extend)
+ bool extend)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *l;
@@ -4075,7 +4084,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_set_item_size(leaf, slot, new_size);
btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4108,7 +4117,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
old_data = btrfs_item_data_end(leaf, slot);
BUG_ON(slot < 0);
- if (slot >= nritems) {
+ if (unlikely(slot >= nritems)) {
btrfs_print_leaf(leaf);
btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
slot, nritems);
@@ -4135,7 +4144,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
btrfs_set_item_size(leaf, slot, old_size + data_size);
btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4183,7 +4192,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
data_end = leaf_data_end(leaf);
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
- if (btrfs_leaf_free_space(leaf) < total_size) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < total_size)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(leaf));
@@ -4193,7 +4202,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (slot != nritems) {
unsigned int old_data = btrfs_item_data_end(leaf, slot);
- if (old_data < data_end) {
+ if (unlikely(old_data < data_end)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info,
"item at slot %d with data offset %u beyond data end of leaf %u",
@@ -4232,7 +4241,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(leaf, nritems + batch->nr);
btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4374,7 +4383,7 @@ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (level) {
ret = btrfs_tree_mod_log_insert_move(parent, slot,
slot + 1, nritems - slot - 1);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -4387,7 +4396,7 @@ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} else if (level) {
ret = btrfs_tree_mod_log_insert_key(parent, slot,
BTRFS_MOD_LOG_KEY_REMOVE);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 738179a5e170..7b277934f66f 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -153,7 +153,7 @@ void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh)
}
/*
- * Pick the defragable inode that we want, if it doesn't exist, we will get the
+ * Pick the defraggable inode that we want, if it doesn't exist, we will get the
* next one.
*/
static struct inode_defrag *btrfs_pick_defrag_inode(
@@ -924,7 +924,7 @@ again:
folio_put(folio);
goto again;
}
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(-EIO);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0f8d8e275143..41e37f7f67cc 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -57,6 +57,7 @@ static inline void btrfs_init_delayed_node(
delayed_node->root = root;
delayed_node->inode_id = inode_id;
refcount_set(&delayed_node->refs, 0);
+ btrfs_delayed_node_ref_tracker_dir_init(delayed_node);
delayed_node->ins_root = RB_ROOT_CACHED;
delayed_node->del_root = RB_ROOT_CACHED;
mutex_init(&delayed_node->mutex);
@@ -65,7 +66,8 @@ static inline void btrfs_init_delayed_node(
}
static struct btrfs_delayed_node *btrfs_get_delayed_node(
- struct btrfs_inode *btrfs_inode)
+ struct btrfs_inode *btrfs_inode,
+ struct btrfs_ref_tracker *tracker)
{
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(btrfs_inode);
@@ -74,6 +76,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
node = READ_ONCE(btrfs_inode->delayed_node);
if (node) {
refcount_inc(&node->refs);
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_NOFS);
return node;
}
@@ -83,6 +86,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
if (node) {
if (btrfs_inode->delayed_node) {
refcount_inc(&node->refs); /* can be accessed */
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
BUG_ON(btrfs_inode->delayed_node != node);
xa_unlock(&root->delayed_nodes);
return node;
@@ -106,6 +110,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
*/
if (refcount_inc_not_zero(&node->refs)) {
refcount_inc(&node->refs);
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
+ btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker,
+ GFP_ATOMIC);
btrfs_inode->delayed_node = node;
} else {
node = NULL;
@@ -126,7 +133,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
* Return the delayed node, or error pointer on failure.
*/
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct btrfs_inode *btrfs_inode)
+ struct btrfs_inode *btrfs_inode,
+ struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
struct btrfs_root *root = btrfs_inode->root;
@@ -135,7 +143,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
void *ptr;
again:
- node = btrfs_get_delayed_node(btrfs_inode);
+ node = btrfs_get_delayed_node(btrfs_inode, tracker);
if (node)
return node;
@@ -144,12 +152,10 @@ again:
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
- /* Cached in the inode and can be accessed. */
- refcount_set(&node->refs, 2);
-
/* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
if (ret == -ENOMEM) {
+ btrfs_delayed_node_ref_tracker_dir_exit(node);
kmem_cache_free(delayed_node_cache, node);
return ERR_PTR(-ENOMEM);
}
@@ -158,6 +164,7 @@ again:
if (ptr) {
/* Somebody inserted it, go back and read it. */
xa_unlock(&root->delayed_nodes);
+ btrfs_delayed_node_ref_tracker_dir_exit(node);
kmem_cache_free(delayed_node_cache, node);
node = NULL;
goto again;
@@ -166,6 +173,12 @@ again:
ASSERT(xa_err(ptr) != -EINVAL);
ASSERT(xa_err(ptr) != -ENOMEM);
ASSERT(ptr == NULL);
+
+ /* Cached in the inode and can be accessed. */
+ refcount_set(&node->refs, 2);
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
+ btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_ATOMIC);
+
btrfs_inode->delayed_node = node;
xa_unlock(&root->delayed_nodes);
@@ -191,6 +204,8 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
list_add_tail(&node->n_list, &root->node_list);
list_add_tail(&node->p_list, &root->prepare_list);
refcount_inc(&node->refs); /* inserted into list */
+ btrfs_delayed_node_ref_tracker_alloc(node, &node->node_list_tracker,
+ GFP_ATOMIC);
root->nodes++;
set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
}
@@ -204,6 +219,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
spin_lock(&root->lock);
if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
root->nodes--;
+ btrfs_delayed_node_ref_tracker_free(node, &node->node_list_tracker);
refcount_dec(&node->refs); /* not in the list */
list_del_init(&node->n_list);
if (!list_empty(&node->p_list))
@@ -214,22 +230,26 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
}
static struct btrfs_delayed_node *btrfs_first_delayed_node(
- struct btrfs_delayed_root *delayed_root)
+ struct btrfs_delayed_root *delayed_root,
+ struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
spin_lock(&delayed_root->lock);
node = list_first_entry_or_null(&delayed_root->node_list,
struct btrfs_delayed_node, n_list);
- if (node)
+ if (node) {
refcount_inc(&node->refs);
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
+ }
spin_unlock(&delayed_root->lock);
return node;
}
static struct btrfs_delayed_node *btrfs_next_delayed_node(
- struct btrfs_delayed_node *node)
+ struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_root *delayed_root;
struct list_head *p;
@@ -249,6 +269,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
next = list_entry(p, struct btrfs_delayed_node, n_list);
refcount_inc(&next->refs);
+ btrfs_delayed_node_ref_tracker_alloc(next, tracker, GFP_ATOMIC);
out:
spin_unlock(&delayed_root->lock);
@@ -257,7 +278,7 @@ out:
static void __btrfs_release_delayed_node(
struct btrfs_delayed_node *delayed_node,
- int mod)
+ int mod, struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_root *delayed_root;
@@ -273,6 +294,7 @@ static void __btrfs_release_delayed_node(
btrfs_dequeue_delayed_node(delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
+ btrfs_delayed_node_ref_tracker_free(delayed_node, tracker);
if (refcount_dec_and_test(&delayed_node->refs)) {
struct btrfs_root *root = delayed_node->root;
@@ -282,17 +304,20 @@ static void __btrfs_release_delayed_node(
* back up. We can delete it now.
*/
ASSERT(refcount_read(&delayed_node->refs) == 0);
+ btrfs_delayed_node_ref_tracker_dir_exit(delayed_node);
kmem_cache_free(delayed_node_cache, delayed_node);
}
}
-static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
+static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker)
{
- __btrfs_release_delayed_node(node, 0);
+ __btrfs_release_delayed_node(node, 0, tracker);
}
static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
- struct btrfs_delayed_root *delayed_root)
+ struct btrfs_delayed_root *delayed_root,
+ struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
@@ -302,6 +327,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
if (node) {
list_del_init(&node->p_list);
refcount_inc(&node->refs);
+ btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
}
spin_unlock(&delayed_root->lock);
@@ -309,9 +335,10 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
}
static inline void btrfs_release_prepared_delayed_node(
- struct btrfs_delayed_node *node)
+ struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker)
{
- __btrfs_release_delayed_node(node, 1);
+ __btrfs_release_delayed_node(node, 1, tracker);
}
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
@@ -711,8 +738,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
u32 *ins_sizes;
int i = 0;
- ins_data = kmalloc(batch.nr * sizeof(u32) +
- batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
+ ins_data = kmalloc_array(batch.nr,
+ sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS);
if (!ins_data) {
ret = -ENOMEM;
goto out;
@@ -1011,7 +1038,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* transaction, because we could leave the inode with the
* improper counts behind.
*/
- if (ret != -ENOENT)
+ if (unlikely(ret != -ENOENT))
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1039,7 +1066,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto err_out;
}
@@ -1126,6 +1153,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
+ struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
@@ -1143,17 +1171,18 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
delayed_root = fs_info->delayed_root;
- curr_node = btrfs_first_delayed_node(delayed_root);
+ curr_node = btrfs_first_delayed_node(delayed_root, &curr_delayed_node_tracker);
while (curr_node && (!count || nr--)) {
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
- curr_node = btrfs_next_delayed_node(curr_node);
+ prev_delayed_node_tracker = curr_delayed_node_tracker;
+ curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker);
/*
* See the comment below about releasing path before releasing
* node. If the commit of delayed items was successful the path
@@ -1161,7 +1190,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
* point to locked extent buffers (a leaf at the very least).
*/
ASSERT(path->nodes[0] == NULL);
- btrfs_release_delayed_node(prev_node);
+ btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker);
}
/*
@@ -1174,7 +1203,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
btrfs_free_path(path);
if (curr_node)
- btrfs_release_delayed_node(curr_node);
+ btrfs_release_delayed_node(curr_node, &curr_delayed_node_tracker);
trans->block_rsv = block_rsv;
return ret;
@@ -1193,7 +1222,9 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
- struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
+ struct btrfs_ref_tracker delayed_node_tracker;
+ struct btrfs_delayed_node *delayed_node =
+ btrfs_get_delayed_node(inode, &delayed_node_tracker);
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_block_rsv *block_rsv;
int ret;
@@ -1204,14 +1235,14 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
if (!delayed_node->count) {
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
if (!path) {
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -ENOMEM;
}
@@ -1220,7 +1251,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
trans->block_rsv = block_rsv;
return ret;
@@ -1230,18 +1261,20 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_trans_handle *trans;
- struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
+ struct btrfs_ref_tracker delayed_node_tracker;
+ struct btrfs_delayed_node *delayed_node;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
+ delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return 0;
mutex_lock(&delayed_node->mutex);
if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
mutex_unlock(&delayed_node->mutex);
@@ -1275,7 +1308,7 @@ trans_out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
out:
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@@ -1289,7 +1322,8 @@ void btrfs_remove_delayed_node(struct btrfs_inode *inode)
return;
inode->delayed_node = NULL;
- btrfs_release_delayed_node(delayed_node);
+
+ btrfs_release_delayed_node(delayed_node, &delayed_node->inode_cache_tracker);
}
struct btrfs_async_delayed_work {
@@ -1305,6 +1339,7 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
+ struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
int total_done = 0;
@@ -1321,7 +1356,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
BTRFS_DELAYED_BACKGROUND / 2)
break;
- delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+ delayed_node = btrfs_first_prepared_delayed_node(delayed_root,
+ &delayed_node_tracker);
if (!delayed_node)
break;
@@ -1330,7 +1366,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_release_path(path);
- btrfs_release_prepared_delayed_node(delayed_node);
+ btrfs_release_prepared_delayed_node(delayed_node,
+ &delayed_node_tracker);
total_done++;
continue;
}
@@ -1345,7 +1382,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
btrfs_btree_balance_dirty_nodelay(root->fs_info);
btrfs_release_path(path);
- btrfs_release_prepared_delayed_node(delayed_node);
+ btrfs_release_prepared_delayed_node(delayed_node,
+ &delayed_node_tracker);
total_done++;
} while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
@@ -1377,10 +1415,15 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
{
- struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
+ struct btrfs_ref_tracker delayed_node_tracker;
+ struct btrfs_delayed_node *node;
- if (WARN_ON(node))
+ node = btrfs_first_delayed_node( fs_info->delayed_root, &delayed_node_tracker);
+ if (WARN_ON(node)) {
+ btrfs_delayed_node_ref_tracker_free(node,
+ &delayed_node_tracker);
refcount_dec(&node->refs);
+ }
}
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
@@ -1454,13 +1497,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info;
const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
struct btrfs_delayed_node *delayed_node;
+ struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_item *delayed_item;
struct btrfs_dir_item *dir_item;
bool reserve_leaf_space;
u32 data_len;
int ret;
- delayed_node = btrfs_get_or_create_delayed_node(dir);
+ delayed_node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@@ -1536,7 +1580,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_unlock(&delayed_node->mutex);
release_node:
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@@ -1591,10 +1635,11 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
+ struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_item *item;
int ret;
- node = btrfs_get_or_create_delayed_node(dir);
+ node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker);
if (IS_ERR(node))
return PTR_ERR(node);
@@ -1635,14 +1680,16 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
}
mutex_unlock(&node->mutex);
end:
- btrfs_release_delayed_node(node);
+ btrfs_release_delayed_node(node, &delayed_node_tracker);
return ret;
}
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
{
- struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
+ struct btrfs_ref_tracker delayed_node_tracker;
+ struct btrfs_delayed_node *delayed_node;
+ delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return -ENOENT;
@@ -1652,12 +1699,12 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
* is updated now. So we needn't lock the delayed node.
*/
if (!delayed_node->index_cnt) {
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -EINVAL;
}
inode->index_cnt = delayed_node->index_cnt;
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@@ -1668,8 +1715,9 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
+ struct btrfs_ref_tracker delayed_node_tracker;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return false;
@@ -1704,6 +1752,7 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
* insert/delete delayed items in this period. So we also needn't
* requeue or dequeue this delayed node.
*/
+ btrfs_delayed_node_ref_tracker_free(delayed_node, &delayed_node_tracker);
refcount_dec(&delayed_node->refs);
return true;
@@ -1843,19 +1892,19 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
+ struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_inode_item *inode_item;
struct inode *vfs_inode = &inode->vfs_inode;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return -ENOENT;
mutex_lock(&delayed_node->mutex);
if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -ENOENT;
}
@@ -1864,8 +1913,6 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item));
btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item));
@@ -1895,7 +1942,7 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
inode->index_cnt = (u64)-1;
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@@ -1904,9 +1951,10 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
struct btrfs_delayed_node *delayed_node;
+ struct btrfs_ref_tracker delayed_node_tracker;
int ret = 0;
- delayed_node = btrfs_get_or_create_delayed_node(inode);
+ delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@@ -1926,7 +1974,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
atomic_inc(&root->fs_info->delayed_root->items);
release_node:
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@@ -1934,6 +1982,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
+ struct btrfs_ref_tracker delayed_node_tracker;
/*
* we don't do delayed inode updates during log recovery because it
@@ -1943,7 +1992,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return -EAGAIN;
- delayed_node = btrfs_get_or_create_delayed_node(inode);
+ delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@@ -1970,7 +2019,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
atomic_inc(&fs_info->delayed_root->items);
release_node:
mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@@ -2014,19 +2063,21 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
+ struct btrfs_ref_tracker delayed_node_tracker;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return;
__btrfs_kill_delayed_node(delayed_node);
- btrfs_release_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
}
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
{
unsigned long index = 0;
struct btrfs_delayed_node *delayed_nodes[8];
+ struct btrfs_ref_tracker delayed_node_trackers[8];
while (1) {
struct btrfs_delayed_node *node;
@@ -2045,6 +2096,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
* about to be removed from the tree in the loop below
*/
if (refcount_inc_not_zero(&node->refs)) {
+ btrfs_delayed_node_ref_tracker_alloc(node,
+ &delayed_node_trackers[count],
+ GFP_ATOMIC);
delayed_nodes[count] = node;
count++;
}
@@ -2056,7 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
for (int i = 0; i < count; i++) {
__btrfs_kill_delayed_node(delayed_nodes[i]);
- btrfs_release_delayed_node(delayed_nodes[i]);
+ btrfs_release_delayed_node(delayed_nodes[i],
+ &delayed_node_trackers[i]);
+ btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]);
}
}
}
@@ -2064,14 +2120,17 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_node *curr_node, *prev_node;
+ struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker;
- curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
+ curr_node = btrfs_first_delayed_node(fs_info->delayed_root,
+ &curr_delayed_node_tracker);
while (curr_node) {
__btrfs_kill_delayed_node(curr_node);
prev_node = curr_node;
- curr_node = btrfs_next_delayed_node(curr_node);
- btrfs_release_delayed_node(prev_node);
+ prev_delayed_node_tracker = curr_delayed_node_tracker;
+ curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker);
+ btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker);
}
}
@@ -2081,8 +2140,9 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
+ struct btrfs_ref_tracker delayed_node_tracker;
- node = btrfs_get_delayed_node(inode);
+ node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!node)
return;
@@ -2140,6 +2200,7 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
+ btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker);
refcount_dec(&node->refs);
}
@@ -2150,8 +2211,9 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
struct btrfs_delayed_item *next;
+ struct btrfs_ref_tracker delayed_node_tracker;
- node = btrfs_get_delayed_node(inode);
+ node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!node)
return;
@@ -2183,5 +2245,6 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
+ btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker);
refcount_dec(&node->refs);
}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index e6e763ad2d42..0d949edc0caf 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
+#include <linux/ref_tracker.h>
#include "ctree.h"
struct btrfs_disk_key;
@@ -44,6 +45,22 @@ struct btrfs_delayed_root {
wait_queue_head_t wait;
};
+struct btrfs_ref_tracker_dir {
+#ifdef CONFIG_BTRFS_DEBUG
+ struct ref_tracker_dir dir;
+#else
+ struct {} tracker;
+#endif
+};
+
+struct btrfs_ref_tracker {
+#ifdef CONFIG_BTRFS_DEBUG
+ struct ref_tracker *tracker;
+#else
+ struct {} tracker;
+#endif
+};
+
#define BTRFS_DELAYED_NODE_IN_LIST 0
#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
#define BTRFS_DELAYED_NODE_DEL_IREF 2
@@ -78,6 +95,12 @@ struct btrfs_delayed_node {
* actual number of leaves we end up using. Protected by @mutex.
*/
u32 index_item_leaves;
+ /* Track all references to this delayed node. */
+ struct btrfs_ref_tracker_dir ref_dir;
+ /* Track delayed node reference stored in node list. */
+ struct btrfs_ref_tracker node_list_tracker;
+ /* Track delayed node reference stored in inode cache. */
+ struct btrfs_ref_tracker inode_cache_tracker;
};
struct btrfs_delayed_item {
@@ -169,4 +192,74 @@ void __cold btrfs_delayed_inode_exit(void);
/* for debugging */
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);
+#define BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT 16
+#define BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT 16
+
+#ifdef CONFIG_BTRFS_DEBUG
+static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node)
+{
+ if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
+ return;
+
+ ref_tracker_dir_init(&node->ref_dir.dir,
+ BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT,
+ "delayed_node");
+}
+
+static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node)
+{
+ if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
+ return;
+
+ ref_tracker_dir_exit(&node->ref_dir.dir);
+}
+
+static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node)
+{
+ if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
+ return;
+
+ ref_tracker_dir_print(&node->ref_dir.dir,
+ BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT);
+}
+
+static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker,
+ gfp_t gfp)
+{
+ if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
+ return 0;
+
+ return ref_tracker_alloc(&node->ref_dir.dir, &tracker->tracker, gfp);
+}
+
+static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker)
+{
+ if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
+ return 0;
+
+ return ref_tracker_free(&node->ref_dir.dir, &tracker->tracker);
+}
+#else
+static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node) { }
+
+static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node) { }
+
+static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node) { }
+
+static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker,
+ gfp_t gfp)
+{
+ return 0;
+}
+
+static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node,
+ struct btrfs_ref_tracker *tracker)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ca382c5b186f..481802efaa14 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -895,7 +895,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
}
/*
- * Initialize the structure which represents a modification to a an extent.
+ * Initialize the structure which represents a modification to an extent.
*
* @fs_info: Internal to the mounted filesystem mount structure.
*
@@ -952,7 +952,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
bool skip_qgroup)
{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+#ifdef CONFIG_BTRFS_DEBUG
/* If @real_root not set, use @root as fallback */
generic_ref->real_root = mod_root ?: generic_ref->ref_root;
#endif
@@ -969,7 +969,7 @@ void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
u64 mod_root, bool skip_qgroup)
{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+#ifdef CONFIG_BTRFS_DEBUG
/* If @real_root not set, use @root as fallback */
generic_ref->real_root = mod_root ?: generic_ref->ref_root;
#endif
@@ -1251,7 +1251,6 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
{
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
struct btrfs_fs_info *fs_info = trans->fs_info;
- bool testing = btrfs_is_testing(fs_info);
spin_lock(&delayed_refs->lock);
while (true) {
@@ -1281,7 +1280,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
spin_unlock(&delayed_refs->lock);
mutex_unlock(&head->mutex);
- if (!testing && pin_bytes) {
+ if (!btrfs_is_testing(fs_info) && pin_bytes) {
struct btrfs_block_group *bg;
bg = btrfs_lookup_block_group(fs_info, head->bytenr);
@@ -1312,14 +1311,14 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
head->bytenr + head->num_bytes - 1);
}
- if (!testing)
+ if (!btrfs_is_testing(fs_info))
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
}
- if (!testing)
+ if (!btrfs_is_testing(fs_info))
btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 552ec4fa645d..5ce940532144 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -276,10 +276,6 @@ struct btrfs_ref {
*/
bool skip_qgroup;
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- /* Through which root is this modification. */
- u64 real_root;
-#endif
u64 bytenr;
u64 num_bytes;
u64 owning_root;
@@ -296,6 +292,11 @@ struct btrfs_ref {
struct btrfs_data_ref data_ref;
struct btrfs_tree_ref tree_ref;
};
+
+#ifdef CONFIG_BTRFS_DEBUG
+ /* Through which root is this modification. */
+ u64 real_root;
+#endif
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 4675bcd5f92e..a4eaef60549e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -98,7 +98,7 @@ no_valid_dev_replace_entry_found:
* We don't have a replace item or it's corrupted. If there is
* a replace target, fail the mount.
*/
- if (btrfs_find_device(fs_info->fs_devices, &args)) {
+ if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) {
btrfs_err(fs_info,
"found replace target device without a valid replace item");
return -EUCLEAN;
@@ -158,7 +158,7 @@ no_valid_dev_replace_entry_found:
* We don't have an active replace item but if there is a
* replace target, fail the mount.
*/
- if (btrfs_find_device(fs_info->fs_devices, &args)) {
+ if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) {
btrfs_err(fs_info,
"replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN;
@@ -177,8 +177,7 @@ no_valid_dev_replace_entry_found:
* allow 'btrfs dev replace_cancel' if src/tgt device is
* missing
*/
- if (!dev_replace->srcdev &&
- !btrfs_test_opt(fs_info, DEGRADED)) {
+ if (unlikely(!dev_replace->srcdev && !btrfs_test_opt(fs_info, DEGRADED))) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
@@ -186,8 +185,7 @@ no_valid_dev_replace_entry_found:
"srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
src_devid);
}
- if (!dev_replace->tgtdev &&
- !btrfs_test_opt(fs_info, DEGRADED)) {
+ if (unlikely(!dev_replace->tgtdev && !btrfs_test_opt(fs_info, DEGRADED))) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
@@ -637,7 +635,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
- DEBUG_WARN("unexpected STARTED ot SUSPENDED dev-replace state");
+ DEBUG_WARN("unexpected STARTED or SUSPENDED dev-replace state");
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
up_write(&dev_replace->rwsem);
goto leave;
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index fe9a4bd7e6e6..802d4dbe5b38 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -786,6 +786,18 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
+ /*
+ * For bs > ps support, we heavily rely on large folios to make sure no
+ * block will cross large folio boundaries.
+ *
+ * But memory provided by direct IO is only virtually contiguous, not
+ * physically contiguous, and will break the btrfs' large folio requirement.
+ *
+ * So for bs > ps support, all direct IOs should fallback to buffered ones.
+ */
+ if (fs_info->sectorsize > PAGE_SIZE)
+ return -EINVAL;
+
return 0;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 70fc4e7cc5a0..9247a58894de 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -116,7 +116,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
-int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
+int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic)
{
if (!extent_buffer_uptodate(eb))
return 0;
@@ -370,21 +370,21 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
found_start = btrfs_header_bytenr(eb);
- if (found_start != eb->start) {
+ if (unlikely(found_start != eb->start)) {
btrfs_err_rl(fs_info,
"bad tree block start, mirror %u want %llu have %llu",
eb->read_mirror, eb->start, found_start);
ret = -EIO;
goto out;
}
- if (check_tree_block_fsid(eb)) {
+ if (unlikely(check_tree_block_fsid(eb))) {
btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
eb->start, eb->read_mirror);
ret = -EIO;
goto out;
}
found_level = btrfs_header_level(eb);
- if (found_level >= BTRFS_MAX_LEVEL) {
+ if (unlikely(found_level >= BTRFS_MAX_LEVEL)) {
btrfs_err(fs_info,
"bad tree block level, mirror %u level %d on logical %llu",
eb->read_mirror, btrfs_header_level(eb), eb->start);
@@ -404,13 +404,13 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
CSUM_FMT_VALUE(csum_size, result),
btrfs_header_level(eb),
ignore_csum ? ", ignored" : "");
- if (!ignore_csum) {
+ if (unlikely(!ignore_csum)) {
ret = -EUCLEAN;
goto out;
}
}
- if (found_level != check->level) {
+ if (unlikely(found_level != check->level)) {
btrfs_err(fs_info,
"level verify failed on logical %llu mirror %u wanted %u found %u",
eb->start, eb->read_mirror, check->level, found_level);
@@ -639,7 +639,6 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
u64 objectid, gfp_t flags)
{
struct btrfs_root *root;
- bool dummy = btrfs_is_testing(fs_info);
root = kzalloc(sizeof(*root), flags);
if (!root)
@@ -696,7 +695,7 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
root->log_transid_committed = -1;
btrfs_set_root_last_log_commit(root, 0);
root->anon_dev = 0;
- if (!dummy) {
+ if (!btrfs_is_testing(fs_info)) {
btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES);
btrfs_extent_io_tree_init(fs_info, &root->log_csum_range,
@@ -1047,7 +1046,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
root->node = NULL;
goto fail;
}
- if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+ if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) {
ret = -EIO;
goto fail;
}
@@ -1056,10 +1055,10 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
* For real fs, and not log/reloc trees, root owner must
* match its root node owner
*/
- if (!btrfs_is_testing(fs_info) &&
- btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
- btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
- btrfs_root_id(root) != btrfs_header_owner(root->node)) {
+ if (unlikely(!btrfs_is_testing(fs_info) &&
+ btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
+ btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
+ btrfs_root_id(root) != btrfs_header_owner(root->node))) {
btrfs_crit(fs_info,
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
btrfs_root_id(root), root->node->start,
@@ -1248,6 +1247,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
if (fs_info->fs_devices)
btrfs_close_devices(fs_info->fs_devices);
+ btrfs_free_compress_wsm(fs_info);
percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
@@ -1958,7 +1958,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
- unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
+ unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
@@ -2058,7 +2058,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
u64 bytenr = btrfs_super_log_root(disk_super);
int level = btrfs_super_log_root_level(disk_super);
- if (fs_devices->rw_devices == 0) {
+ if (unlikely(fs_devices->rw_devices == 0)) {
btrfs_warn(fs_info, "log replay required on RO media");
return -EIO;
}
@@ -2079,7 +2079,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
btrfs_put_root(log_tree_root);
return ret;
}
- if (!extent_buffer_uptodate(log_tree_root->node)) {
+ if (unlikely(!extent_buffer_uptodate(log_tree_root->node))) {
btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root);
return -EIO;
@@ -2087,10 +2087,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
+ btrfs_put_root(log_tree_root);
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
"Failed to recover log tree");
- btrfs_put_root(log_tree_root);
return ret;
}
@@ -2324,7 +2324,7 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
const u32 sectorsize = btrfs_super_sectorsize(sb);
u32 sys_array_size = btrfs_super_sys_array_size(sb);
- if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ if (unlikely(sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)) {
btrfs_err(fs_info, "system chunk array too big %u > %u",
sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
return -EUCLEAN;
@@ -2342,12 +2342,12 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
len = sizeof(*disk_key);
- if (cur + len > sys_array_size)
+ if (unlikely(cur + len > sys_array_size))
goto short_read;
cur += len;
btrfs_disk_key_to_cpu(&key, disk_key);
- if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+ if (unlikely(key.type != BTRFS_CHUNK_ITEM_KEY)) {
btrfs_err(fs_info,
"unexpected item type %u in sys_array at offset %u",
key.type, cur);
@@ -2355,10 +2355,10 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
}
chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
- if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)
+ if (unlikely(cur + btrfs_chunk_item_size(num_stripes) > sys_array_size))
goto short_read;
type = btrfs_stack_chunk_type(chunk);
- if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ if (unlikely(!(type & BTRFS_BLOCK_GROUP_SYSTEM))) {
btrfs_err(fs_info,
"invalid chunk type %llu in sys_array at offset %u",
type, cur);
@@ -2438,21 +2438,7 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- /*
- * We only support at most 3 sectorsizes: 4K, PAGE_SIZE, MIN_BLOCKSIZE.
- *
- * For 4K page sized systems with non-debug builds, all 3 matches (4K).
- * For 4K page sized systems with debug builds, there are two block sizes
- * supported. (4K and 2K)
- *
- * We can support 16K sectorsize with 64K page size without problem,
- * but such sectorsize/pagesize combination doesn't make much sense.
- * 4K will be our future standard, PAGE_SIZE is supported from the very
- * beginning.
- */
- if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K &&
- sectorsize != PAGE_SIZE &&
- sectorsize != BTRFS_MIN_BLOCKSIZE)) {
+ if (!btrfs_supported_blocksize(sectorsize)) {
btrfs_err(fs_info,
"sectorsize %llu not yet supported for page size %lu",
sectorsize, PAGE_SIZE);
@@ -2619,13 +2605,13 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
ret = btrfs_validate_super(fs_info, sb, -1);
if (ret < 0)
goto out;
- if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
+ if (unlikely(!btrfs_supported_super_csum(btrfs_super_csum_type(sb)))) {
ret = -EUCLEAN;
btrfs_err(fs_info, "invalid csum type, has %u want %u",
btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
goto out;
}
- if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
+ if (unlikely(btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP)) {
ret = -EUCLEAN;
btrfs_err(fs_info,
"invalid incompat flags, has 0x%llx valid mask 0x%llx",
@@ -2655,7 +2641,7 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
root->node = NULL;
return ret;
}
- if (!extent_buffer_uptodate(root->node)) {
+ if (unlikely(!extent_buffer_uptodate(root->node))) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
@@ -3256,18 +3242,24 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
}
/*
- * Subpage runtime limitation on v1 cache.
+ * Subpage/bs > ps runtime limitation on v1 cache.
*
- * V1 space cache still has some hard codeed PAGE_SIZE usage, while
+ * V1 space cache still has some hard coded PAGE_SIZE usage, while
* we're already defaulting to v2 cache, no need to bother v1 as it's
* going to be deprecated anyway.
*/
- if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
+ if (fs_info->sectorsize != PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
btrfs_warn(fs_info,
"v1 space cache is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
return -EINVAL;
}
+ if (fs_info->sectorsize > PAGE_SIZE && btrfs_fs_incompat(fs_info, RAID56)) {
+ btrfs_err(fs_info,
+ "RAID56 is not supported for page size %lu with sectorsize %u",
+ PAGE_SIZE, fs_info->sectorsize);
+ return -EINVAL;
+ }
/* This can be called by remount, we need to protect the super block. */
spin_lock(&fs_info->super_lock);
@@ -3396,10 +3388,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->nodesize_bits = ilog2(nodesize);
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
+ fs_info->block_min_order = ilog2(round_up(sectorsize, PAGE_SIZE) >> PAGE_SHIFT);
+ fs_info->block_max_order = ilog2((BITS_PER_LONG << fs_info->sectorsize_bits) >> PAGE_SHIFT);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
fs_info->fs_devices->fs_info = fs_info;
+ if (fs_info->sectorsize > PAGE_SIZE)
+ btrfs_warn(fs_info,
+ "support for block size %u with page size %zu is experimental, some features may be missing",
+ fs_info->sectorsize, PAGE_SIZE);
/*
* Handle the space caching options appropriately now that we have the
* super block loaded and validated.
@@ -3421,6 +3419,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
+ ret = btrfs_alloc_compress_wsm(fs_info);
+ if (ret)
+ goto fail_sb_buffer;
ret = btrfs_init_workqueues(fs_info);
if (ret)
goto fail_sb_buffer;
@@ -3468,7 +3469,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* below in btrfs_init_dev_replace().
*/
btrfs_free_extra_devids(fs_devices);
- if (!fs_devices->latest_dev->bdev) {
+ if (unlikely(!fs_devices->latest_dev->bdev)) {
btrfs_err(fs_info, "failed to read devices");
ret = -EIO;
goto fail_tree_roots;
@@ -3962,7 +3963,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
* Checks last_flush_error of disks in order to determine the device
* state.
*/
- if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
+ if (unlikely(errors_wait && !btrfs_check_rw_degradable(info, NULL)))
return -EIO;
return 0;
@@ -4064,7 +4065,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
ret = btrfs_validate_write_super(fs_info, sb);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_handle_fs_error(fs_info, -EUCLEAN,
"unexpected superblock corruption detected");
@@ -4075,7 +4076,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
if (ret)
total_errors++;
}
- if (total_errors > max_errors) {
+ if (unlikely(total_errors > max_errors)) {
btrfs_err(fs_info, "%d errors while writing supers",
total_errors);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -4100,7 +4101,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
total_errors++;
}
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- if (total_errors > max_errors) {
+ if (unlikely(total_errors > max_errors)) {
btrfs_handle_fs_error(fs_info, -EIO,
"%d errors while writing supers",
total_errors);
@@ -4880,7 +4881,7 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of valid range.
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 864a55a96226..57920f2c6fe4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -106,8 +106,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
void btrfs_put_root(struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
struct extent_buffer *buf);
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
- int atomic);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf,
const struct btrfs_tree_parent_check *check);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 7fc8a3200b40..d062ac521051 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -174,7 +174,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset of -1 found, there would have to exist an
* inode with such number or a root with such id.
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 66361325f6dc..bb2ca1c9c7b0 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1237,7 +1237,7 @@ hit_next:
state = next_search_state(inserted_state, end);
/*
* If there's a next state, whether contiguous or not, we don't
- * need to unlock and start search agian. If it's not contiguous
+ * need to unlock and start search again. If it's not contiguous
* we will end up here and try to allocate a prealloc state and insert.
*/
if (state)
@@ -1664,7 +1664,7 @@ out:
*/
u64 btrfs_count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- u32 bits, int contig,
+ u32 bits, bool contig,
struct extent_state **cached_state)
{
struct extent_state *state = NULL;
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 36facca37973..6f07b965e8da 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -163,7 +163,7 @@ void __cold btrfs_extent_state_free_cachep(void);
u64 btrfs_count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, u32 bits, int contig,
+ u64 max_bytes, u32 bits, bool contig,
struct extent_state **cached_state);
void btrfs_free_extent_state(struct extent_state *state);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 97d517cdf2df..dc4ca98c3780 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -325,7 +325,7 @@ search_again:
/*
* is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
- * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
+ * is_data == BTRFS_REF_TYPE_DATA, data type is required,
* is_data == BTRFS_REF_TYPE_ANY, either type is OK.
*/
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
@@ -879,7 +879,7 @@ again:
ptr += btrfs_extent_inline_ref_size(type);
continue;
}
- if (type == BTRFS_REF_TYPE_INVALID) {
+ if (unlikely(type == BTRFS_REF_TYPE_INVALID)) {
ret = -EUCLEAN;
goto out;
}
@@ -1210,7 +1210,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
* We're adding refs to a tree block we already own, this
* should not happen at all.
*/
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (unlikely(owner < BTRFS_FIRST_FREE_OBJECTID)) {
btrfs_print_leaf(path->nodes[0]);
btrfs_crit(trans->fs_info,
"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
@@ -2157,7 +2157,7 @@ again:
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
ret = __btrfs_run_delayed_refs(trans, min_bytes);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2355,7 +2355,7 @@ static noinline int check_committed_ref(struct btrfs_inode *inode,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -2457,7 +2457,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- int full_backref, int inc)
+ bool full_backref, bool inc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 parent;
@@ -2543,15 +2543,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref)
+ struct extent_buffer *buf, bool full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, true);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref)
+ struct extent_buffer *buf, bool full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, false);
}
static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
@@ -2760,7 +2760,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(cache);
total_unpinned = 0;
cache = btrfs_lookup_block_group(fs_info, start);
- if (cache == NULL) {
+ if (unlikely(cache == NULL)) {
/* Logic error, something removed the block group. */
ret = -EUCLEAN;
goto out;
@@ -2982,26 +2982,26 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_delete_raid_extent(trans, bytenr, num_bytes);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
ret = btrfs_record_squota_delta(trans->fs_info, delta);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3115,7 +3115,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
- if (!is_data && refs_to_drop != 1) {
+ if (unlikely(!is_data && refs_to_drop != 1)) {
btrfs_crit(info,
"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
node->bytenr, refs_to_drop);
@@ -3162,7 +3162,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (!found_extent) {
- if (iref) {
+ if (unlikely(iref)) {
abort_and_dump(trans, path,
"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
path->slots[0]);
@@ -3172,7 +3172,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop, is_data);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3221,7 +3221,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"umm, got %d back from search, was looking for %llu, slot %d",
ret, bytenr, path->slots[0]);
}
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3254,7 +3254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
- if (item_size < sizeof(*ei) + sizeof(*bi)) {
+ if (unlikely(item_size < sizeof(*ei) + sizeof(*bi))) {
abort_and_dump(trans, path,
"invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
@@ -3268,7 +3268,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
refs = btrfs_extent_refs(leaf, ei);
- if (refs < refs_to_drop) {
+ if (unlikely(refs < refs_to_drop)) {
abort_and_dump(trans, path,
"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
refs_to_drop, refs, bytenr, path->slots[0]);
@@ -3285,7 +3285,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* be updated by remove_extent_backref
*/
if (iref) {
- if (!found_extent) {
+ if (unlikely(!found_extent)) {
abort_and_dump(trans, path,
"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
path->slots[0]);
@@ -3298,7 +3298,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop, is_data);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3314,8 +3314,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* In this branch refs == 1 */
if (found_extent) {
- if (is_data && refs_to_drop !=
- extent_data_ref_count(path, iref)) {
+ if (unlikely(is_data && refs_to_drop !=
+ extent_data_ref_count(path, iref))) {
abort_and_dump(trans, path,
"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
extent_data_ref_count(path, iref),
@@ -3324,7 +3324,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
if (iref) {
- if (path->slots[0] != extent_slot) {
+ if (unlikely(path->slots[0] != extent_slot)) {
abort_and_dump(trans, path,
"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
key.objectid, key.type,
@@ -3339,7 +3339,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* | extent_slot ||extent_slot + 1|
* [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
*/
- if (path->slots[0] != extent_slot + 1) {
+ if (unlikely(path->slots[0] != extent_slot + 1)) {
abort_and_dump(trans, path,
"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
path->slots[0]);
@@ -3363,7 +3363,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4297,7 +4297,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
}
static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
- struct find_free_extent_ctl *ffe_ctl)
+ struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_space_info *space_info)
{
if (ffe_ctl->for_treelog) {
spin_lock(&fs_info->treelog_bg_lock);
@@ -4315,12 +4316,13 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->zone_active_bgs_lock);
list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
/*
- * No lock is OK here because avail is monotinically
+ * No lock is OK here because avail is monotonically
* decreasing, and this is just a hint.
*/
u64 avail = block_group->zone_capacity - block_group->alloc_offset;
if (block_group_bits(block_group, ffe_ctl->flags) &&
+ block_group->space_info == space_info &&
avail >= ffe_ctl->num_bytes) {
ffe_ctl->hint_byte = block_group->start;
break;
@@ -4342,7 +4344,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
return prepare_allocation_clustered(fs_info, ffe_ctl,
space_info, ins);
case BTRFS_EXTENT_ALLOC_ZONED:
- return prepare_allocation_zoned(fs_info, ffe_ctl);
+ return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
default:
BUG();
}
@@ -5061,7 +5063,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (IS_ERR(buf))
return buf;
- if (check_eb_lock_owner(buf)) {
+ if (unlikely(check_eb_lock_owner(buf))) {
free_extent_buffer(buf);
return ERR_PTR(-EUCLEAN);
}
@@ -5470,17 +5472,17 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
if (!(wc->flags[level] & flag)) {
ASSERT(path->locks[level]);
ret = btrfs_inc_ref(trans, root, eb, 1);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_dec_ref(trans, root, eb, 0);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_set_disk_extent_flags(trans, eb, flag);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -5582,7 +5584,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]);
- if (btrfs_buffer_uptodate(next, generation, 0))
+ if (btrfs_buffer_uptodate(next, generation, false))
return 0;
check.level = level - 1;
@@ -5611,7 +5613,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
* If we are UPDATE_BACKREF then we will not, we need to update our backrefs.
*
* If we are DROP_REFERENCE this will figure out if we need to drop our current
- * reference, skipping it if we dropped it from a previous incompleted drop, or
+ * reference, skipping it if we dropped it from a previous uncompleted drop, or
* dropping it if we still have a reference to it.
*/
static int maybe_drop_reference(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -5636,7 +5638,7 @@ static int maybe_drop_reference(struct btrfs_trans_handle *trans, struct btrfs_r
ref.parent = path->nodes[level]->start;
} else {
ASSERT(btrfs_root_id(root) == btrfs_header_owner(path->nodes[level]));
- if (btrfs_root_id(root) != btrfs_header_owner(path->nodes[level])) {
+ if (unlikely(btrfs_root_id(root) != btrfs_header_owner(path->nodes[level]))) {
btrfs_err(root->fs_info, "mismatched block owner");
return -EIO;
}
@@ -5758,7 +5760,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
/*
* We have to walk down into this node, and if we're currently at the
- * DROP_REFERNCE stage and this block is shared then we need to switch
+ * DROP_REFERENCE stage and this block is shared then we need to switch
* to the UPDATE_BACKREF stage in order to convert to FULL_BACKREF.
*/
if (wc->stage == DROP_REFERENCE && wc->refs[level - 1] > 1) {
@@ -5772,7 +5774,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
level--;
ASSERT(level == btrfs_header_level(next));
- if (level != btrfs_header_level(next)) {
+ if (unlikely(level != btrfs_header_level(next))) {
btrfs_err(root->fs_info, "mismatched level");
ret = -EIO;
goto out_unlock;
@@ -5883,7 +5885,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
}
} else {
ret = btrfs_dec_ref(trans, root, eb, 0);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -5908,13 +5910,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (eb == root->node) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = eb->start;
- else if (btrfs_root_id(root) != btrfs_header_owner(eb))
+ else if (unlikely(btrfs_root_id(root) != btrfs_header_owner(eb)))
goto owner_mismatch;
} else {
if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = path->nodes[level + 1]->start;
- else if (btrfs_root_id(root) !=
- btrfs_header_owner(path->nodes[level + 1]))
+ else if (unlikely(btrfs_root_id(root) !=
+ btrfs_header_owner(path->nodes[level + 1])))
goto owner_mismatch;
}
@@ -6049,9 +6051,9 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* also make sure backrefs for the shared block and all lower level
* blocks are properly updated.
*
- * If called with for_reloc == 0, may exit early with -EAGAIN
+ * If called with for_reloc set, may exit early with -EAGAIN
*/
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
+int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc)
{
const bool is_reloc_root = (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID);
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6178,13 +6180,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
while (1) {
ret = walk_down_tree(trans, root, path, wc);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
break;
}
ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -6211,7 +6213,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
root_item);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -6247,7 +6249,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
ret = btrfs_del_root(trans, &root->root_key);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -6255,7 +6257,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
if (!is_reloc_root) {
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
} else if (ret > 0) {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 82d3a82dc712..e970ac42a871 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -140,9 +140,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref);
+ struct extent_buffer *buf, bool full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref);
+ struct extent_buffer *buf, bool full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
@@ -155,8 +155,7 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans,
const struct extent_buffer *eb);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref);
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
- int for_reloc);
+int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c953297aa89a..c123a3ef154a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -101,6 +101,26 @@ struct btrfs_bio_ctrl {
enum btrfs_compression_type compress_type;
u32 len_to_oe_boundary;
blk_opf_t opf;
+ /*
+ * For data read bios, we attempt to optimize csum lookups if the extent
+ * generation is older than the current one. To make this possible, we
+ * need to track the maximum generation of an extent in a bio_ctrl to
+ * make the decision when submitting the bio.
+ *
+ * The pattern between do_readpage(), submit_one_bio() and
+ * submit_extent_folio() is quite subtle, so tracking this is tricky.
+ *
+ * As we process extent E, we might submit a bio with existing built up
+ * extents before adding E to a new bio, or we might just add E to the
+ * bio. As a result, E's generation could apply to the current bio or
+ * to the next one, so we need to be careful to update the bio_ctrl's
+ * generation with E's only when we are sure E is added to bio_ctrl->bbio
+ * in submit_extent_folio().
+ *
+ * See the comment in btrfs_lookup_bio_sums() for more detail on the
+ * need for this optimization.
+ */
+ u64 generation;
btrfs_bio_end_io_t end_io_func;
struct writeback_control *wbc;
@@ -111,8 +131,46 @@ struct btrfs_bio_ctrl {
*/
unsigned long submit_bitmap;
struct readahead_control *ractl;
+
+ /*
+ * The start offset of the last used extent map by a read operation.
+ *
+ * This is for proper compressed read merge.
+ * U64_MAX means we are starting the read and have made no progress yet.
+ *
+ * The current btrfs_bio_is_contig() only uses disk_bytenr as
+ * the condition to check if the read can be merged with previous
+ * bio, which is not correct. E.g. two file extents pointing to the
+ * same extent but with different offset.
+ *
+ * So here we need to do extra checks to only merge reads that are
+ * covered by the same extent map.
+ * Just extent_map::start will be enough, as they are unique
+ * inside the same inode.
+ */
+ u64 last_em_start;
};
+/*
+ * Helper to set the csum search commit root option for a bio_ctrl's bbio
+ * before submitting the bio.
+ *
+ * Only for use by submit_one_bio().
+ */
+static void bio_set_csum_search_commit_root(struct btrfs_bio_ctrl *bio_ctrl)
+{
+ struct btrfs_bio *bbio = bio_ctrl->bbio;
+
+ ASSERT(bbio);
+
+ if (!(btrfs_op(&bbio->bio) == BTRFS_MAP_READ && is_data_inode(bbio->inode)))
+ return;
+
+ bio_ctrl->bbio->csum_search_commit_root =
+ (bio_ctrl->generation &&
+ bio_ctrl->generation < btrfs_get_fs_generation(bbio->inode->root->fs_info));
+}
+
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_bio *bbio = bio_ctrl->bbio;
@@ -123,6 +181,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* Caller should ensure the bio has at least some range added */
ASSERT(bbio->bio.bi_iter.bi_size);
+ bio_set_csum_search_commit_root(bio_ctrl);
+
if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
btrfs_submit_compressed_read(bbio);
@@ -131,6 +191,12 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* The bbio is owned by the end_io handler now */
bio_ctrl->bbio = NULL;
+ /*
+ * We used the generation to decide whether to lookup csums in the
+ * commit_root or not when we called bio_set_csum_search_commit_root()
+ * above. Now, reset the generation for the next bio.
+ */
+ bio_ctrl->generation = 0;
}
/*
@@ -327,6 +393,13 @@ again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
+
+ /*
+ * If @max_bytes is smaller than a block, btrfs_find_delalloc_range() can
+ * return early without handling any dirty ranges.
+ */
+ ASSERT(max_bytes >= fs_info->sectorsize);
+
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
@@ -352,18 +425,19 @@ again:
if (delalloc_end + 1 - delalloc_start > max_bytes)
delalloc_end = delalloc_start + max_bytes - 1;
- /* step two, lock all the folioss after the folios that has start */
+ /* step two, lock all the folios after the folios that has start */
ret = lock_delalloc_folios(inode, locked_folio, delalloc_start,
delalloc_end);
ASSERT(!ret || ret == -EAGAIN);
if (ret == -EAGAIN) {
- /* some of the folios are gone, lets avoid looping by
- * shortening the size of the delalloc range we're searching
+ /*
+ * Some of the folios are gone, lets avoid looping by
+ * shortening the size of the delalloc range we're searching.
*/
btrfs_free_extent_state(cached_state);
cached_state = NULL;
if (!loops) {
- max_bytes = PAGE_SIZE;
+ max_bytes = fs_info->sectorsize;
loops = 1;
goto again;
} else {
@@ -552,6 +626,7 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* Populate every free slot in a provided array with folios using GFP_NOFS.
*
* @nr_folios: number of folios to allocate
+ * @order: the order of the folios to be allocated
* @folio_array: the array to fill with folios; any existing non-NULL entries in
* the array will be skipped
*
@@ -559,12 +634,13 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* -ENOMEM otherwise, the partially allocated folios would be freed and
* the array slots zeroed
*/
-int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array)
+int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order,
+ struct folio **folio_array)
{
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
continue;
- folio_array[i] = folio_alloc(GFP_NOFS, 0);
+ folio_array[i] = folio_alloc(GFP_NOFS, order);
if (!folio_array[i])
goto error;
}
@@ -573,6 +649,7 @@ error:
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
folio_put(folio_array[i]);
+ folio_array[i] = NULL;
}
return -ENOMEM;
}
@@ -701,15 +778,18 @@ static void alloc_new_bio(struct btrfs_inode *inode,
* @size: portion of page that we want to write to
* @pg_offset: offset of the new bio or to check whether we are adding
* a contiguous page to the previous one
+ * @read_em_generation: generation of the extent_map we are submitting
+ * (only used for read)
*
* The will either add the page into the existing @bio_ctrl->bbio, or allocate a
* new one in @bio_ctrl->bbio.
- * The mirror number for this IO should already be initizlied in
+ * The mirror number for this IO should already be initialized in
* @bio_ctrl->mirror_num.
*/
static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
u64 disk_bytenr, struct folio *folio,
- size_t size, unsigned long pg_offset)
+ size_t size, unsigned long pg_offset,
+ u64 read_em_generation)
{
struct btrfs_inode *inode = folio_to_inode(folio);
loff_t file_offset = folio_pos(folio) + pg_offset;
@@ -740,6 +820,11 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
submit_one_bio(bio_ctrl);
continue;
}
+ /*
+ * Now that the folio is definitely added to the bio, include its
+ * generation in the max generation calculation.
+ */
+ bio_ctrl->generation = max(bio_ctrl->generation, read_em_generation);
bio_ctrl->next_file_offset += len;
if (bio_ctrl->wbc)
@@ -909,7 +994,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl,
* return 0 on success, otherwise return error
*/
static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
- struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
+ struct btrfs_bio_ctrl *bio_ctrl)
{
struct inode *inode = folio->mapping->host;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
@@ -942,6 +1027,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
bool force_bio_submit = false;
u64 disk_bytenr;
u64 block_start;
+ u64 em_gen;
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
if (cur >= last_byte) {
@@ -1019,13 +1105,13 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
* non-optimal behavior (submitting 2 bios for the same extent).
*/
if (compress_type != BTRFS_COMPRESS_NONE &&
- prev_em_start && *prev_em_start != (u64)-1 &&
- *prev_em_start != em->start)
+ bio_ctrl->last_em_start != U64_MAX &&
+ bio_ctrl->last_em_start != em->start)
force_bio_submit = true;
- if (prev_em_start)
- *prev_em_start = em->start;
+ bio_ctrl->last_em_start = em->start;
+ em_gen = em->generation;
btrfs_free_extent_map(em);
em = NULL;
@@ -1049,7 +1135,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
if (force_bio_submit)
submit_one_bio(bio_ctrl);
submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
- pg_offset);
+ pg_offset, em_gen);
}
return 0;
}
@@ -1238,12 +1324,15 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
const u64 start = folio_pos(folio);
const u64 end = start + folio_size(folio) - 1;
struct extent_state *cached_state = NULL;
- struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
+ struct btrfs_bio_ctrl bio_ctrl = {
+ .opf = REQ_OP_READ,
+ .last_em_start = U64_MAX,
+ };
struct extent_map *em_cached = NULL;
int ret;
lock_extents_for_read(inode, start, end, &cached_state);
- ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
+ ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl);
btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
btrfs_free_extent_map(em_cached);
@@ -1580,7 +1669,7 @@ static int submit_one_sector(struct btrfs_inode *inode,
ASSERT(folio_test_writeback(folio));
submit_extent_folio(bio_ctrl, disk_bytenr, folio,
- sectorsize, filepos - folio_pos(folio));
+ sectorsize, filepos - folio_pos(folio), 0);
return 0;
}
@@ -1601,7 +1690,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
- bool error = false;
+ int found_error = 0;
const u64 folio_start = folio_pos(folio);
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
u64 cur;
@@ -1665,7 +1754,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
*/
btrfs_mark_ordered_io_finished(inode, folio, cur,
fs_info->sectorsize, false);
- error = true;
+ if (!found_error)
+ found_error = ret;
continue;
}
submitted_io = true;
@@ -1682,11 +1772,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* If we hit any error, the corresponding sector will have its dirty
* flag cleared and writeback finished, thus no need to handle the error case.
*/
- if (!submitted_io && !error) {
+ if (!submitted_io && !found_error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
- return ret;
+ return found_error;
}
/*
@@ -2147,7 +2237,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
* @fs_info: The fs_info for this file system.
* @start: The offset of the range to start waiting on writeback.
* @end: The end of the range, inclusive. This is meant to be used in
- * conjuction with wait_marked_extents, so this will usually be
+ * conjunction with wait_marked_extents, so this will usually be
* the_next_eb->start - 1.
*/
void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
@@ -2417,7 +2507,7 @@ retry:
* In above case, [32K, 96K) is asynchronously submitted
* for compression, and [124K, 128K) needs to be written back.
*
- * If we didn't wait wrtiteback for page 64K, [128K, 128K)
+ * If we didn't wait writeback for page 64K, [128K, 128K)
* won't be submitted as the page still has writeback flag
* and will be skipped in the next check.
*
@@ -2583,7 +2673,8 @@ void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = {
.opf = REQ_OP_READ | REQ_RAHEAD,
- .ractl = rac
+ .ractl = rac,
+ .last_em_start = U64_MAX,
};
struct folio *folio;
struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
@@ -2591,12 +2682,11 @@ void btrfs_readahead(struct readahead_control *rac)
const u64 end = start + readahead_length(rac) - 1;
struct extent_state *cached_state = NULL;
struct extent_map *em_cached = NULL;
- u64 prev_em_start = (u64)-1;
lock_extents_for_read(inode, start, end, &cached_state);
while ((folio = readahead_folio(rac)) != NULL)
- btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+ btrfs_do_readpage(folio, &em_cached, &bio_ctrl);
btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
@@ -2901,7 +2991,7 @@ static void cleanup_extent_buffer_folios(struct extent_buffer *eb)
{
const int num_folios = num_extent_folios(eb);
- /* We canont use num_extent_folios() as loop bound as eb->folios changes. */
+ /* We cannot use num_extent_folios() as loop bound as eb->folios changes. */
for (int i = 0; i < num_folios; i++) {
ASSERT(eb->folios[i]);
detach_extent_buffer_folio(eb, eb->folios[i]);
@@ -3148,29 +3238,30 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info,
*/
static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
{
- if (!IS_ALIGNED(start, fs_info->sectorsize)) {
+ const u32 nodesize = fs_info->nodesize;
+
+ if (unlikely(!IS_ALIGNED(start, fs_info->sectorsize))) {
btrfs_err(fs_info, "bad tree block start %llu", start);
return true;
}
- if (fs_info->nodesize < PAGE_SIZE && !IS_ALIGNED(start, fs_info->nodesize)) {
+ if (unlikely(nodesize < PAGE_SIZE && !IS_ALIGNED(start, nodesize))) {
btrfs_err(fs_info,
"tree block is not nodesize aligned, start %llu nodesize %u",
- start, fs_info->nodesize);
+ start, nodesize);
return true;
}
- if (fs_info->nodesize >= PAGE_SIZE &&
- !PAGE_ALIGNED(start)) {
+ if (unlikely(nodesize >= PAGE_SIZE && !PAGE_ALIGNED(start))) {
btrfs_err(fs_info,
"tree block is not page aligned, start %llu nodesize %u",
- start, fs_info->nodesize);
+ start, nodesize);
return true;
}
- if (!IS_ALIGNED(start, fs_info->nodesize) &&
- !test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags)) {
+ if (unlikely(!IS_ALIGNED(start, nodesize) &&
+ !test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags))) {
btrfs_warn(fs_info,
"tree block not nodesize aligned, start %llu nodesize %u, can be resolved by a full metadata balance",
- start, fs_info->nodesize);
+ start, nodesize);
}
return false;
}
@@ -3789,7 +3880,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
return ret;
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+ if (unlikely(!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)))
return -EIO;
return 0;
}
@@ -4465,7 +4556,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb))
return;
- if (btrfs_buffer_uptodate(eb, gen, 1)) {
+ if (btrfs_buffer_uptodate(eb, gen, true)) {
free_extent_buffer(eb);
return;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 61130786b9a3..5fcbfe44218c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -366,7 +366,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
bool nofail);
-int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
+int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order,
+ struct folio **folio_array);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 57f52585a6dd..7e38c23a0c1c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -460,7 +460,7 @@ void btrfs_clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
static inline void setup_extent_mapping(struct btrfs_inode *inode,
struct extent_map *em,
- int modified)
+ bool modified)
{
refcount_inc(&em->refs);
@@ -486,7 +486,7 @@ static inline void setup_extent_mapping(struct btrfs_inode *inode,
* taken, or a reference dropped if the merge attempt was successful.
*/
static int add_extent_mapping(struct btrfs_inode *inode,
- struct extent_map *em, int modified)
+ struct extent_map *em, bool modified)
{
struct extent_map_tree *tree = &inode->extent_tree;
struct btrfs_root *root = inode->root;
@@ -509,7 +509,7 @@ static int add_extent_mapping(struct btrfs_inode *inode,
}
static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len, int strict)
+ u64 start, u64 len, bool strict)
{
struct extent_map *em;
struct rb_node *rb_node;
@@ -548,7 +548,7 @@ static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
- return lookup_extent_mapping(tree, start, len, 1);
+ return lookup_extent_mapping(tree, start, len, true);
}
/*
@@ -566,7 +566,7 @@ struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *btrfs_search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
- return lookup_extent_mapping(tree, start, len, 0);
+ return lookup_extent_mapping(tree, start, len, false);
}
/*
@@ -594,7 +594,7 @@ void btrfs_remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *e
static void replace_extent_mapping(struct btrfs_inode *inode,
struct extent_map *cur,
struct extent_map *new,
- int modified)
+ bool modified)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *tree = &inode->extent_tree;
@@ -670,7 +670,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
em->len = end - start;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
em->offset += start_diff;
- return add_extent_mapping(inode, em, 0);
+ return add_extent_mapping(inode, em, false);
}
/*
@@ -707,7 +707,7 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
if (em->disk_bytenr == EXTENT_MAP_INLINE)
ASSERT(em->start == 0);
- ret = add_extent_mapping(inode, em, 0);
+ ret = add_extent_mapping(inode, em, false);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
@@ -1057,7 +1057,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
btrfs_lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
em = btrfs_lookup_extent_mapping(em_tree, start, len);
- if (!em) {
+ if (unlikely(!em)) {
ret = -EIO;
goto out_unlock;
}
@@ -1082,7 +1082,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
split_pre->flags = flags;
split_pre->generation = em->generation;
- replace_extent_mapping(inode, em, split_pre, 1);
+ replace_extent_mapping(inode, em, split_pre, true);
/*
* Now we only have an extent_map at:
@@ -1098,7 +1098,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->generation = em->generation;
- add_extent_mapping(inode, split_mid, 1);
+ add_extent_mapping(inode, split_mid, true);
/* Once for us */
btrfs_free_extent_map(em);
@@ -1372,7 +1372,7 @@ void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
if (atomic64_cmpxchg(&fs_info->em_shrinker_nr_to_scan, 0, nr_to_scan) != 0)
return;
- queue_work(system_unbound_wq, &fs_info->em_shrinker_work);
+ queue_work(system_dfl_wq, &fs_info->em_shrinker_work);
}
void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c
index 7935586a9dbd..f2eaaef8422b 100644
--- a/fs/btrfs/fiemap.c
+++ b/fs/btrfs/fiemap.c
@@ -153,7 +153,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
if (cache_end > offset) {
if (offset == cache->offset) {
/*
- * We cached a dealloc range (found in the io tree) for
+ * We cached a delalloc range (found in the io tree) for
* a hole or prealloc extent and we have now found a
* file extent item for the same offset. What we have
* now is more recent and up to date, so discard what
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c09fbc257634..a42e6d54e7cd 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -397,6 +397,36 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
path->skip_locking = 1;
}
+ /*
+ * If we are searching for a csum of an extent from a past
+ * transaction, we can search in the commit root and reduce
+ * lock contention on the csum tree extent buffers.
+ *
+ * This is important because that lock is an rwsem which gets
+ * pretty heavy write load under memory pressure and sustained
+ * csum overwrites, unlike the commit_root_sem. (Memory pressure
+ * makes us writeback the nodes multiple times per transaction,
+ * which makes us cow them each time, taking the write lock.)
+ *
+ * Due to how rwsem is implemented, there is a possible
+ * priority inversion where the readers holding the lock don't
+ * get scheduled (say they're in a cgroup stuck in heavy reclaim)
+ * which then blocks writers, including transaction commit. By
+ * using a semaphore with fewer writers (only a commit switching
+ * the roots), we make this issue less likely.
+ *
+ * Note that we don't rely on btrfs_search_slot to lock the
+ * commit root csum. We call search_slot multiple times, which would
+ * create a potential race where a commit comes in between searches
+ * while we are not holding the commit_root_sem, and we get csums
+ * from across transactions.
+ */
+ if (bbio->csum_search_commit_root) {
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
+ down_read(&fs_info->commit_root_sem);
+ }
+
while (bio_offset < orig_len) {
int count;
u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset;
@@ -442,6 +472,8 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
bio_offset += count * sectorsize;
}
+ if (bbio->csum_search_commit_root)
+ up_read(&fs_info->commit_root_sem);
return ret;
}
@@ -743,12 +775,10 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
- char *data;
- struct bvec_iter iter;
- struct bio_vec bvec;
+ struct bvec_iter iter = bio->bi_iter;
+ phys_addr_t paddr;
+ const u32 blocksize = fs_info->sectorsize;
int index;
- unsigned int blockcount;
- int i;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@@ -767,21 +797,9 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
shash->tfm = fs_info->csum_shash;
- bio_for_each_segment(bvec, bio, iter) {
- blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
- bvec.bv_len + fs_info->sectorsize
- - 1);
-
- for (i = 0; i < blockcount; i++) {
- data = bvec_kmap_local(&bvec);
- crypto_shash_digest(shash,
- data + (i * fs_info->sectorsize),
- fs_info->sectorsize,
- sums->sums + index);
- kunmap_local(data);
- index += fs_info->csum_size;
- }
-
+ btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
+ btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
+ index += fs_info->csum_size;
}
bbio->sums = sums;
@@ -993,7 +1011,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
* item changed size or key
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
- if (ret && ret != -EAGAIN) {
+ if (unlikely(ret && ret != -EAGAIN)) {
btrfs_abort_transaction(trans, ret);
break;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 204674934795..7efd1f8a1912 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -327,7 +327,7 @@ next_slot:
args->start - extent_offset,
0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -426,7 +426,7 @@ delete_extent_item:
key.offset - extent_offset,
0, false);
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -443,7 +443,7 @@ delete_extent_item:
ret = btrfs_del_items(trans, root, path, del_slot,
del_nr);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -587,21 +587,20 @@ again:
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != ino ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
+ if (unlikely(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
}
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
+ if (unlikely(btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
}
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- if (key.offset > start || extent_end < end) {
+ if (unlikely(key.offset > start || extent_end < end)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -676,7 +675,7 @@ again:
btrfs_release_path(path);
goto again;
}
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -704,7 +703,7 @@ again:
ref.ref_root = btrfs_root_id(root);
btrfs_init_data_ref(&ref, ino, orig_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -712,7 +711,7 @@ again:
if (split == start) {
key.offset = start;
} else {
- if (start != key.offset) {
+ if (unlikely(start != key.offset)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -744,7 +743,7 @@ again:
del_slot = path->slots[0] + 1;
del_nr++;
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -762,7 +761,7 @@ again:
del_slot = path->slots[0];
del_nr++;
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -783,7 +782,7 @@ again:
extent_end - key.offset);
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -815,7 +814,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
if (ret)
return ret;
folio_lock(folio);
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
return -EIO;
}
@@ -970,7 +969,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
* Return:
* > 0 If we can nocow, and updates @write_bytes.
* 0 If we can't do a nocow write.
- * -EAGAIN If we can't do a nocow write because snapshoting of the inode's
+ * -EAGAIN If we can't do a nocow write because snapshotting of the inode's
* root is in progress or because we are in a non-blocking IO
* context and need to block (@nowait is true).
* < 0 If an error happened.
@@ -2460,9 +2459,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* got EOPNOTSUPP via prealloc then we messed up and
* need to abort.
*/
- if (ret &&
- (ret != -EOPNOTSUPP ||
- (extent_info && extent_info->is_new_extent)))
+ if (unlikely(ret &&
+ (ret != -EOPNOTSUPP ||
+ (extent_info && extent_info->is_new_extent))))
btrfs_abort_transaction(trans, ret);
break;
}
@@ -2473,7 +2472,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
cur_offset < ino_size) {
ret = fill_holes(trans, inode, path, cur_offset,
drop_args.drop_end);
- if (ret) {
+ if (unlikely(ret)) {
/*
* If we failed then we didn't insert our hole
* entries for the area we dropped, so now the
@@ -2493,7 +2492,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_inode_clear_file_extent_range(inode,
cur_offset,
drop_args.drop_end - cur_offset);
- if (ret) {
+ if (unlikely(ret)) {
/*
* We couldn't clear our area, so we could
* presumably adjust up and corrupt the fs, so
@@ -2512,7 +2511,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_insert_replace_extent(trans, inode, path,
extent_info, replace_len,
drop_args.bytes_found);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -2607,7 +2606,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
cur_offset < drop_args.drop_end) {
ret = fill_holes(trans, inode, path, cur_offset,
drop_args.drop_end);
- if (ret) {
+ if (unlikely(ret)) {
/* Same comment as above. */
btrfs_abort_transaction(trans, ret);
goto out_trans;
@@ -2616,7 +2615,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
/* See the comment in the loop above for the reasoning here. */
ret = btrfs_inode_clear_file_extent_range(inode, cur_offset,
drop_args.drop_end - cur_offset);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
@@ -2626,7 +2625,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_insert_replace_extent(trans, inode, path,
extent_info, extent_info->data_len,
drop_args.bytes_found);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
@@ -3345,7 +3344,7 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end
* We could also use the extent map tree to find such delalloc that is
* being flushed, but using the ordered extents tree is more efficient
* because it's usually much smaller as ordered extents are removed from
- * the tree once they complete. With the extent maps, we mau have them
+ * the tree once they complete. With the extent maps, we may have them
* in the extent map tree for a very long time, and they were either
* created by previous writes or loaded by read operations.
*/
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5d8d1570a5c9..ab873bd67192 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2282,7 +2282,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* If this block group has some small extents we don't want to
* use up all of our free slots in the cache with them, we want
* to reserve them to larger extents, however if we have plenty
- * of cache left then go ahead an dadd them, no sense in adding
+ * of cache left then go ahead and add them, no sense in adding
* the overhead of a bitmap if we don't have to.
*/
if (info->bytes <= fs_info->sectorsize * 8) {
@@ -3829,7 +3829,7 @@ out_unlock:
/*
* If we break out of trimming a bitmap prematurely, we should reset the
- * trimming bit. In a rather contrieved case, it's possible to race here so
+ * trimming bit. In a rather contrived case, it's possible to race here so
* reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
*
* start = start of bitmap
@@ -4142,7 +4142,7 @@ int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool act
if (!active) {
set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
ret = cleanup_free_space_cache_v1(fs_info, trans);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index eba7f22ae49c..dad0b492a663 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -137,12 +137,12 @@ static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
DEBUG_WARN();
return -EIO;
}
- if (p->slots[0] == 0) {
+ if (unlikely(p->slots[0] == 0)) {
DEBUG_WARN("no previous slot found");
return -EIO;
}
@@ -218,7 +218,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
- if (!bitmap) {
+ if (unlikely(!bitmap)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -233,7 +233,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -271,7 +271,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -293,7 +293,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
- if (extent_count != expected_extent_count) {
+ if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@@ -320,7 +320,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key,
data_size);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -361,7 +361,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
- if (!bitmap) {
+ if (unlikely(!bitmap)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -376,7 +376,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -420,7 +420,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -454,7 +454,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
key.offset = (end_bit - start_bit) * fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -465,7 +465,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
}
- if (extent_count != expected_extent_count) {
+ if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@@ -848,14 +848,14 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
return 0;
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
- if (!block_group) {
+ if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
@@ -1030,14 +1030,14 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
return 0;
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
- if (!block_group) {
+ if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
@@ -1185,7 +1185,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
goto out_clear;
}
ret = btrfs_global_root_insert(free_space_root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_put_root(free_space_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
@@ -1197,7 +1197,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out_clear;
@@ -1290,14 +1290,14 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
ret = btrfs_del_root(trans, &free_space_root->root_key);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -1315,7 +1315,7 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
free_space_root->node, 0, 1);
btrfs_put_root(free_space_root);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -1344,7 +1344,7 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -1362,7 +1362,7 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
goto next;
ret = populate_free_space_tree(trans, block_group);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -1422,7 +1422,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
if (!path) {
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
btrfs_abort_transaction(trans, -ENOMEM);
return -ENOMEM;
}
@@ -1430,7 +1430,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = add_new_free_space_info(trans, block_group, path);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1481,7 +1481,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
}
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -1496,7 +1496,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1527,7 +1527,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1611,7 +1611,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
extent_count++;
}
- if (extent_count != expected_extent_count) {
+ if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@@ -1672,7 +1672,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
extent_count++;
}
- if (extent_count != expected_extent_count) {
+ if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c
index b2bb86f8d7cf..feb0a2faa837 100644
--- a/fs/btrfs/fs.c
+++ b/fs/btrfs/fs.c
@@ -55,6 +55,54 @@ size_t __attribute_const__ btrfs_get_num_csums(void)
}
/*
+ * We support the following block sizes for all systems:
+ *
+ * - 4K
+ * This is the most common block size. For PAGE SIZE > 4K cases the subpage
+ * mode is used.
+ *
+ * - PAGE_SIZE
+ * The straightforward block size to support.
+ *
+ * And extra support for the following block sizes based on the kernel config:
+ *
+ * - MIN_BLOCKSIZE
+ * This is either 4K (regular builds) or 2K (debug builds)
+ * This allows testing subpage routines on x86_64.
+ */
+bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize)
+{
+ /* @blocksize should be validated first. */
+ ASSERT(is_power_of_2(blocksize) && blocksize >= BTRFS_MIN_BLOCKSIZE &&
+ blocksize <= BTRFS_MAX_BLOCKSIZE);
+
+ if (blocksize == PAGE_SIZE || blocksize == SZ_4K || blocksize == BTRFS_MIN_BLOCKSIZE)
+ return true;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /*
+ * For bs > ps support it's done by specifying a minimal folio order
+ * for filemap, thus implying large data folios.
+ * For HIGHMEM systems, we can not always access the content of a (large)
+ * folio in one go, but go through them page by page.
+ *
+ * A lot of features don't implement a proper PAGE sized loop for large
+ * folios, this includes:
+ *
+ * - compression
+ * - verity
+ * - encoded write
+ *
+ * Considering HIGHMEM is such a pain to deal with and it's going
+ * to be deprecated eventually, just reject HIGHMEM && bs > ps cases.
+ */
+ if (IS_ENABLED(CONFIG_HIGHMEM) && blocksize > PAGE_SIZE)
+ return false;
+ return true;
+#endif
+ return false;
+}
+
+/*
* Start exclusive operation @type, return true on success.
*/
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 8cc07cc70b12..814bbc9417d2 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -59,6 +59,8 @@ struct btrfs_space_info;
#define BTRFS_MIN_BLOCKSIZE (SZ_4K)
#endif
+#define BTRFS_MAX_BLOCKSIZE (SZ_64K)
+
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
#define BTRFS_OLDEST_GENERATION 0ULL
@@ -102,6 +104,8 @@ enum {
BTRFS_FS_STATE_RO,
/* Track if a transaction abort has been reported on this filesystem */
BTRFS_FS_STATE_TRANS_ABORTED,
+ /* Track if log replay has failed. */
+ BTRFS_FS_STATE_LOG_REPLAY_ABORTED,
/*
* Bio operations should be blocked on this filesystem because a source
* or target device is being destroyed as part of a device replace
@@ -243,6 +247,7 @@ enum {
BTRFS_MOUNT_NOSPACECACHE = (1ULL << 30),
BTRFS_MOUNT_IGNOREMETACSUMS = (1ULL << 31),
BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32),
+ BTRFS_MOUNT_REF_TRACKER = (1ULL << 33),
};
/*
@@ -280,7 +285,7 @@ enum {
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/*
- * Features under developmen like Extent tree v2 support is enabled
+ * Features under development like Extent tree v2 support is enabled
* only under CONFIG_BTRFS_EXPERIMENTAL
*/
#define BTRFS_FEATURE_INCOMPAT_SUPP \
@@ -303,6 +308,16 @@ enum {
#define BTRFS_WARNING_COMMIT_INTERVAL (300)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
+enum btrfs_compression_type {
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_ZSTD = 3,
+ BTRFS_NR_COMPRESS_TYPES = 4,
+
+ BTRFS_DEFRAG_DONT_COMPRESS,
+};
+
struct btrfs_dev_replace {
/* See #define above */
u64 replace_state;
@@ -505,6 +520,9 @@ struct btrfs_fs_info {
u64 last_trans_log_full_commit;
unsigned long long mount_opt;
+ /* Compress related structures. */
+ void *compr_wsm[BTRFS_NR_COMPRESS_TYPES];
+
int compress_type;
int compress_level;
u32 commit_interval;
@@ -809,6 +827,8 @@ struct btrfs_fs_info {
u32 sectorsize;
/* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
+ u32 block_min_order;
+ u32 block_max_order;
u32 csum_size;
u32 csums_per_leaf;
u32 stripesize;
@@ -878,12 +898,10 @@ struct btrfs_fs_info {
struct lockdep_map btrfs_trans_pending_ordered_map;
struct lockdep_map btrfs_ordered_extent_map;
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+#ifdef CONFIG_BTRFS_DEBUG
spinlock_t ref_verify_lock;
struct rb_root block_tree;
-#endif
-#ifdef CONFIG_BTRFS_DEBUG
struct kobject *debug_kobj;
struct list_head allocated_roots;
@@ -905,6 +923,12 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
return mapping_gfp_constraint(mapping, ~__GFP_FS);
}
+/* Return the minimal folio size of the fs. */
+static inline unsigned int btrfs_min_folio_size(struct btrfs_fs_info *fs_info)
+{
+ return 1U << (PAGE_SHIFT + fs_info->block_min_order);
+}
+
static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
{
return READ_ONCE(fs_info->generation);
@@ -997,6 +1021,7 @@ static inline unsigned int btrfs_blocks_per_folio(const struct btrfs_fs_info *fs
return folio_size(folio) >> fs_info->sectorsize_bits;
}
+bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize);
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
@@ -1107,9 +1132,9 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
#define EXPORT_FOR_TESTS
-static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
+static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{
- return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
+ return unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state));
}
void btrfs_test_destroy_inode(struct inode *inode);
@@ -1118,9 +1143,9 @@ void btrfs_test_destroy_inode(struct inode *inode);
#define EXPORT_FOR_TESTS static
-static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
+static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{
- return 0;
+ return false;
}
#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index f06cf701ae5a..1bd73b80f9fa 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -137,7 +137,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
*/
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
ref_objectid, name);
- if (!extref) {
+ if (unlikely(!extref)) {
btrfs_abort_transaction(trans, -ENOENT);
return -ENOENT;
}
@@ -627,7 +627,7 @@ delete:
if (control->clear_extent_range) {
ret = btrfs_inode_clear_file_extent_range(control->inode,
clear_start, clear_len);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -666,7 +666,7 @@ delete:
btrfs_init_data_ref(&ref, control->ino, extent_offset,
btrfs_root_id(root), false);
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -684,7 +684,7 @@ delete:
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -720,7 +720,7 @@ out:
int ret2;
ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr);
- if (ret2) {
+ if (unlikely(ret2)) {
btrfs_abort_transaction(trans, ret2);
ret = ret2;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dd82dcc7b2b7..ced87c9e4682 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -72,6 +72,9 @@
#include "raid-stripe-tree.h"
#include "fiemap.h"
+#define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0)
+#define COW_FILE_RANGE_NO_INLINE (1UL << 1)
+
struct btrfs_iget_args {
u64 ino;
struct btrfs_root *root;
@@ -367,7 +370,7 @@ int btrfs_inode_lock(struct btrfs_inode *inode, unsigned int ilock_flags)
}
/*
- * Unock inode i_rwsem.
+ * Unlock inode i_rwsem.
*
* ilock_flags should contain the same bits set as passed to btrfs_inode_lock()
* to decide whether the lock acquired is shared or exclusive.
@@ -631,7 +634,7 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
drop_args.replace_extent = true;
drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len);
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -639,7 +642,7 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
size, compressed_size, compress_type,
compressed_folio, update_i_size);
- if (ret && ret != -ENOSPC) {
+ if (unlikely(ret && ret != -ENOSPC)) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
@@ -649,7 +652,7 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
btrfs_update_inode_bytes(inode, size, drop_args.bytes_found);
ret = btrfs_update_inode(trans, inode);
- if (ret && ret != -ENOSPC) {
+ if (unlikely(ret && ret != -ENOSPC)) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
@@ -851,6 +854,8 @@ static void compress_file_range(struct btrfs_work *work)
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct address_space *mapping = inode->vfs_inode.i_mapping;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
@@ -861,7 +866,7 @@ static void compress_file_range(struct btrfs_work *work)
unsigned long nr_folios;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
- unsigned int poff;
+ unsigned int loff;
int i;
int compress_type = fs_info->compress_type;
int compress_level = fs_info->compress_level;
@@ -899,8 +904,8 @@ static void compress_file_range(struct btrfs_work *work)
actual_end = min_t(u64, i_size, end + 1);
again:
folios = NULL;
- nr_folios = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
- nr_folios = min_t(unsigned long, nr_folios, BTRFS_MAX_COMPRESSED_PAGES);
+ nr_folios = (end >> min_folio_shift) - (start >> min_folio_shift) + 1;
+ nr_folios = min_t(unsigned long, nr_folios, BTRFS_MAX_COMPRESSED >> min_folio_shift);
/*
* we don't want to send crud past the end of i_size through
@@ -956,18 +961,18 @@ again:
/* Compression level is applied here. */
ret = btrfs_compress_folios(compress_type, compress_level,
- mapping, start, folios, &nr_folios, &total_in,
+ inode, start, folios, &nr_folios, &total_in,
&total_compressed);
if (ret)
goto mark_incompressible;
/*
- * Zero the tail end of the last page, as we might be sending it down
+ * Zero the tail end of the last folio, as we might be sending it down
* to disk.
*/
- poff = offset_in_page(total_compressed);
- if (poff)
- folio_zero_range(folios[nr_folios - 1], poff, PAGE_SIZE - poff);
+ loff = (total_compressed & (min_folio_size - 1));
+ if (loff)
+ folio_zero_range(folios[nr_folios - 1], loff, min_folio_size - loff);
/*
* Try to create an inline extent.
@@ -1245,18 +1250,18 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* locked_folio is the folio that writepage had locked already. We use
* it to make sure we don't do extra locks or unlocks.
*
- * When this function fails, it unlocks all pages except @locked_folio.
+ * When this function fails, it unlocks all folios except @locked_folio.
*
* When this function successfully creates an inline extent, it returns 1 and
- * unlocks all pages including locked_folio and starts I/O on them.
- * (In reality inline extents are limited to a single page, so locked_folio is
- * the only page handled anyway).
+ * unlocks all folios including locked_folio and starts I/O on them.
+ * (In reality inline extents are limited to a single block, so locked_folio is
+ * the only folio handled anyway).
*
- * When this function succeed and creates a normal extent, the page locking
+ * When this function succeed and creates a normal extent, the folio locking
* status depends on the passed in flags:
*
- * - If @keep_locked is set, all pages are kept locked.
- * - Else all pages except for @locked_folio are unlocked.
+ * - If COW_FILE_RANGE_KEEP_LOCKED flag is set, all folios are kept locked.
+ * - Else all folios except for @locked_folio are unlocked.
*
* When a failure happens in the second or later iteration of the
* while-loop, the ordered extents created in previous iterations are cleaned up.
@@ -1264,7 +1269,7 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
static noinline int cow_file_range(struct btrfs_inode *inode,
struct folio *locked_folio, u64 start,
u64 end, u64 *done_offset,
- bool keep_locked, bool no_inline)
+ unsigned long flags)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1292,7 +1297,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
- if (!no_inline) {
+ if (!(flags & COW_FILE_RANGE_NO_INLINE)) {
/* lets try to make an inline extent */
ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
@@ -1320,7 +1325,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* Do set the Ordered (Private2) bit so we know this page was properly
* setup for writepage.
*/
- page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
+ page_ops = ((flags & COW_FILE_RANGE_KEEP_LOCKED) ? 0 : PAGE_UNLOCK);
page_ops |= PAGE_SET_ORDERED;
/*
@@ -1531,10 +1536,11 @@ out_unlock:
btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,
end - start - cur_alloc_size + 1, NULL);
}
- btrfs_err_rl(fs_info,
- "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
- __func__, btrfs_root_id(inode->root),
- btrfs_ino(inode), orig_start, end + 1 - orig_start, ret);
+ btrfs_err(fs_info,
+"%s failed, root=%llu inode=%llu start=%llu len=%llu cur_offset=%llu cur_alloc_size=%llu: %d",
+ __func__, btrfs_root_id(inode->root),
+ btrfs_ino(inode), orig_start, end + 1 - orig_start,
+ start, cur_alloc_size, ret);
return ret;
}
@@ -1687,7 +1693,7 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
while (start <= end) {
ret = cow_file_range(inode, locked_folio, start, end,
- &done_offset, true, false);
+ &done_offset, COW_FILE_RANGE_KEEP_LOCKED);
if (ret)
return ret;
extent_write_locked_range(&inode->vfs_inode, locked_folio,
@@ -1768,9 +1774,15 @@ static int fallback_to_cow(struct btrfs_inode *inode,
* Don't try to create inline extents, as a mix of inline extent that
* is written out and unlocked directly and a normal NOCOW extent
* doesn't work.
+ *
+ * And here we do not unlock the folio after a successful run.
+ * The folios will be unlocked after everything is finished, or by error handling.
+ *
+ * This is to ensure error handling won't need to clear dirty/ordered flags without
+ * a locked folio, which can race with writeback.
*/
- ret = cow_file_range(inode, locked_folio, start, end, NULL, false,
- true);
+ ret = cow_file_range(inode, locked_folio, start, end, NULL,
+ COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
ASSERT(ret != 1);
return ret;
}
@@ -1913,61 +1925,14 @@ static int can_nocow_file_extent(struct btrfs_path *path,
return ret < 0 ? ret : can_nocow;
}
-/*
- * Cleanup the dirty folios which will never be submitted due to error.
- *
- * When running a delalloc range, we may need to split the ranges (due to
- * fragmentation or NOCOW). If we hit an error in the later part, we will error
- * out and previously successfully executed range will never be submitted, thus
- * we have to cleanup those folios by clearing their dirty flag, starting and
- * finishing the writeback.
- */
-static void cleanup_dirty_folios(struct btrfs_inode *inode,
- struct folio *locked_folio,
- u64 start, u64 end, int error)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct address_space *mapping = inode->vfs_inode.i_mapping;
- pgoff_t start_index = start >> PAGE_SHIFT;
- pgoff_t end_index = end >> PAGE_SHIFT;
- u32 len;
-
- ASSERT(end + 1 - start < U32_MAX);
- ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
- IS_ALIGNED(end + 1, fs_info->sectorsize));
- len = end + 1 - start;
-
- /*
- * Handle the locked folio first.
- * The btrfs_folio_clamp_*() helpers can handle range out of the folio case.
- */
- btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
-
- for (pgoff_t index = start_index; index <= end_index; index++) {
- struct folio *folio;
-
- /* Already handled at the beginning. */
- if (index == locked_folio->index)
- continue;
- folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS);
- /* Cache already dropped, no need to do any cleanup. */
- if (IS_ERR(folio))
- continue;
- btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
- folio_unlock(folio);
- folio_put(folio);
- }
- mapping_set_error(mapping, error);
-}
-
static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
struct extent_state **cached,
struct can_nocow_file_extent_args *nocow_args,
u64 file_pos, bool is_prealloc)
{
struct btrfs_ordered_extent *ordered;
- u64 len = nocow_args->file_extent.num_bytes;
- u64 end = file_pos + len - 1;
+ const u64 len = nocow_args->file_extent.num_bytes;
+ const u64 end = file_pos + len - 1;
int ret = 0;
btrfs_lock_extent(&inode->io_tree, file_pos, end, cached);
@@ -1978,8 +1943,8 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
em = btrfs_create_io_em(inode, file_pos, &nocow_args->file_extent,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached);
- return PTR_ERR(em);
+ ret = PTR_ERR(em);
+ goto error;
}
btrfs_free_extent_map(em);
}
@@ -1991,8 +1956,8 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
if (IS_ERR(ordered)) {
if (is_prealloc)
btrfs_drop_extent_map_range(inode, file_pos, end, false);
- btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached);
- return PTR_ERR(ordered);
+ ret = PTR_ERR(ordered);
+ goto error;
}
if (btrfs_is_data_reloc_root(inode->root))
@@ -2004,23 +1969,30 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
ret = btrfs_reloc_clone_csums(ordered);
btrfs_put_ordered_extent(ordered);
+ if (ret < 0)
+ goto error;
extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
- PAGE_UNLOCK | PAGE_SET_ORDERED);
- /*
- * On error, we need to cleanup the ordered extents we created.
- *
- * We do not clear the folio Dirty flags because they are set and
- * cleaered by the caller.
- */
- if (ret < 0)
- btrfs_cleanup_ordered_extents(inode, file_pos, len);
+ PAGE_SET_ORDERED);
+ return ret;
+
+error:
+ btrfs_cleanup_ordered_extents(inode, file_pos, len);
+ extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached,
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_CLEAR_DATA_RESV,
+ PAGE_UNLOCK | PAGE_START_WRITEBACK |
+ PAGE_END_WRITEBACK);
+ btrfs_err(inode->root->fs_info,
+ "%s failed, root=%lld inode=%llu start=%llu len=%llu: %d",
+ __func__, btrfs_root_id(inode->root), btrfs_ino(inode),
+ file_pos, len, ret);
return ret;
}
/*
- * when nowcow writeback call back. This checks for snapshots or COW copies
+ * When nocow writeback calls back. This checks for snapshots or COW copies
* of the extents that exist in the file, and COWs the file as required.
*
* If no cow copies or snapshots exist, we write directly to the existing
@@ -2037,13 +2009,23 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
/*
* If not 0, represents the inclusive end of the last fallback_to_cow()
* range. Only for error handling.
+ *
+ * The same for nocow_end, it's to avoid double cleaning up the range
+ * already cleaned by nocow_one_range().
*/
u64 cow_end = 0;
+ u64 nocow_end = 0;
u64 cur_offset = start;
int ret;
bool check_prev = true;
u64 ino = btrfs_ino(inode);
struct can_nocow_file_extent_args nocow_args = { 0 };
+ /* The range that has ordered extent(s). */
+ u64 oe_cleanup_start;
+ u64 oe_cleanup_len = 0;
+ /* The range that is untouched. */
+ u64 untouched_start;
+ u64 untouched_len = 0;
/*
* Normally on a zoned device we're only doing COW writes, but in case
@@ -2207,8 +2189,10 @@ must_cow:
&nocow_args, cur_offset,
extent_type == BTRFS_FILE_EXTENT_PREALLOC);
btrfs_dec_nocow_writers(nocow_bg);
- if (ret < 0)
+ if (ret < 0) {
+ nocow_end = cur_offset + nocow_args.file_extent.num_bytes - 1;
goto error;
+ }
cur_offset = extent_end;
}
btrfs_release_path(path);
@@ -2225,86 +2209,105 @@ must_cow:
cow_start = (u64)-1;
}
- btrfs_free_path(path);
- return 0;
-
-error:
/*
- * There are several error cases:
- *
- * 1) Failed without falling back to COW
- * start cur_offset end
- * |/////////////| |
- *
- * In this case, cow_start should be (u64)-1.
+ * Everything is finished without an error, can unlock the folios now.
*
- * For range [start, cur_offset) the folios are already unlocked (except
- * @locked_folio), EXTENT_DELALLOC already removed.
- * Need to clear the dirty flags and finish the ordered extents.
- *
- * 2) Failed with error before calling fallback_to_cow()
- *
- * start cow_start end
- * |/////////////| |
- *
- * In this case, only @cow_start is set, @cur_offset is between
- * [cow_start, end)
- *
- * It's mostly the same as case 1), just replace @cur_offset with
- * @cow_start.
- *
- * 3) Failed with error from fallback_to_cow()
- *
- * start cow_start cow_end end
- * |/////////////|-----------| |
- *
- * In this case, both @cow_start and @cow_end is set.
- *
- * For range [start, cow_start) it's the same as case 1).
- * But for range [cow_start, cow_end), all the cleanup is handled by
- * cow_file_range(), we should not touch anything in that range.
- *
- * So for all above cases, if @cow_start is set, cleanup ordered extents
- * for range [start, @cow_start), other wise cleanup range [start, @cur_offset).
+ * No need to touch the io tree range nor set folio ordered flag, as
+ * fallback_to_cow() and nocow_one_range() have already handled them.
*/
- if (cow_start != (u64)-1)
- cur_offset = cow_start;
+ extent_clear_unlock_delalloc(inode, start, end, locked_folio, NULL, 0, PAGE_UNLOCK);
- if (cur_offset > start) {
- btrfs_cleanup_ordered_extents(inode, start, cur_offset - start);
- cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret);
- }
+ btrfs_free_path(path);
+ return 0;
- /*
- * If an error happened while a COW region is outstanding, cur_offset
- * needs to be reset to @cow_end + 1 to skip the COW range, as
- * cow_file_range() will do the proper cleanup at error.
- */
- if (cow_end)
- cur_offset = cow_end + 1;
+error:
+ if (cow_start == (u64)-1) {
+ /*
+ * case a)
+ * start cur_offset end
+ * | OE cleanup | Untouched |
+ *
+ * We finished a fallback_to_cow() or nocow_one_range() call,
+ * but failed to check the next range.
+ *
+ * or
+ * start cur_offset nocow_end end
+ * | OE cleanup | Skip | Untouched |
+ *
+ * nocow_one_range() failed, the range [cur_offset, nocow_end] is
+ * already cleaned up.
+ */
+ oe_cleanup_start = start;
+ oe_cleanup_len = cur_offset - start;
+ if (nocow_end)
+ untouched_start = nocow_end + 1;
+ else
+ untouched_start = cur_offset;
+ untouched_len = end + 1 - untouched_start;
+ } else if (cow_start != (u64)-1 && cow_end == 0) {
+ /*
+ * case b)
+ * start cow_start cur_offset end
+ * | OE cleanup | Untouched |
+ *
+ * We got a range that needs COW, but before we hit the next NOCOW range,
+ * thus [cow_start, cur_offset) doesn't yet have any OE.
+ */
+ oe_cleanup_start = start;
+ oe_cleanup_len = cow_start - start;
+ untouched_start = cow_start;
+ untouched_len = end + 1 - untouched_start;
+ } else {
+ /*
+ * case c)
+ * start cow_start cow_end end
+ * | OE cleanup | Skip | Untouched |
+ *
+ * fallback_to_cow() failed, and fallback_to_cow() will do the
+ * cleanup for its range, we shouldn't touch the range
+ * [cow_start, cow_end].
+ */
+ ASSERT(cow_start != (u64)-1 && cow_end != 0);
+ oe_cleanup_start = start;
+ oe_cleanup_len = cow_start - start;
+ untouched_start = cow_end + 1;
+ untouched_len = end + 1 - untouched_start;
+ }
+
+ if (oe_cleanup_len) {
+ const u64 oe_cleanup_end = oe_cleanup_start + oe_cleanup_len - 1;
+ btrfs_cleanup_ordered_extents(inode, oe_cleanup_start, oe_cleanup_len);
+ extent_clear_unlock_delalloc(inode, oe_cleanup_start, oe_cleanup_end,
+ locked_folio, NULL,
+ EXTENT_LOCKED | EXTENT_DELALLOC,
+ PAGE_UNLOCK | PAGE_START_WRITEBACK |
+ PAGE_END_WRITEBACK);
+ }
- /*
- * We need to lock the extent here because we're clearing DELALLOC and
- * we're not locked at this point.
- */
- if (cur_offset < end) {
+ if (untouched_len) {
struct extent_state *cached = NULL;
+ const u64 untouched_end = untouched_start + untouched_len - 1;
- btrfs_lock_extent(&inode->io_tree, cur_offset, end, &cached);
- extent_clear_unlock_delalloc(inode, cur_offset, end,
+ /*
+ * We need to lock the extent here because we're clearing DELALLOC and
+ * we're not locked at this point.
+ */
+ btrfs_lock_extent(&inode->io_tree, untouched_start, untouched_end, &cached);
+ extent_clear_unlock_delalloc(inode, untouched_start, untouched_end,
locked_folio, &cached,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
- btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
+ btrfs_qgroup_free_data(inode, NULL, untouched_start, untouched_len, NULL);
}
btrfs_free_path(path);
- btrfs_err_rl(fs_info,
- "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
- __func__, btrfs_root_id(inode->root),
- btrfs_ino(inode), start, end + 1 - start, ret);
+ btrfs_err(fs_info,
+"%s failed, root=%llu inode=%llu start=%llu len=%llu cur_offset=%llu oe_cleanup=%llu oe_cleanup_len=%llu untouched_start=%llu untouched_len=%llu: %d",
+ __func__, btrfs_root_id(inode->root), btrfs_ino(inode),
+ start, end + 1 - start, cur_offset, oe_cleanup_start, oe_cleanup_len,
+ untouched_start, untouched_len, ret);
return ret;
}
@@ -2349,8 +2352,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
ret = run_delalloc_cow(inode, locked_folio, start, end, wbc,
true);
else
- ret = cow_file_range(inode, locked_folio, start, end, NULL,
- false, false);
+ ret = cow_file_range(inode, locked_folio, start, end, NULL, 0);
return ret;
}
@@ -2986,7 +2988,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* If we dropped an inline extent here, we know the range where it is
* was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
* number of bytes only for that range containing the inline extent.
- * The remaining of the range will be processed when clearning the
+ * The remaining of the range will be processed when clearing the
* EXTENT_DELALLOC_BIT bit through the ordered extent completion.
*/
if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
@@ -3102,14 +3104,15 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
if (!freespace_inode)
btrfs_lockdep_acquire(fs_info, btrfs_ordered_extent);
- if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
+ if (unlikely(test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags))) {
ret = -EIO;
goto out;
}
- if (btrfs_is_zoned(fs_info))
- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes);
+ ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
+ if (ret)
+ goto out;
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -3147,7 +3150,7 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
trans->block_rsv = &inode->block_rsv;
ret = btrfs_insert_raid_extent(trans, ordered_extent);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3155,7 +3158,7 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
/* Logic error */
ASSERT(list_empty(&ordered_extent->list));
- if (!list_empty(&ordered_extent->list)) {
+ if (unlikely(!list_empty(&ordered_extent->list))) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3163,7 +3166,7 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, inode);
- if (ret) {
+ if (unlikely(ret)) {
/* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
}
@@ -3190,20 +3193,20 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_num_bytes);
}
}
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_unpin_extent_cache(inode, ordered_extent->file_offset,
ordered_extent->num_bytes, trans->transid);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = add_pending_csums(trans, &ordered_extent->list);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3221,7 +3224,7 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, inode);
- if (ret) { /* -ENOMEM or corruption */
+ if (unlikely(ret)) { /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3327,21 +3330,47 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
return btrfs_finish_one_ordered(ordered);
}
+void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
+ u8 *dest)
+{
+ struct folio *folio = page_folio(phys_to_page(paddr));
+ const u32 blocksize = fs_info->sectorsize;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+
+ shash->tfm = fs_info->csum_shash;
+ /* The full block must be inside the folio. */
+ ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
+
+ if (folio_test_partial_kmap(folio)) {
+ size_t cur = paddr;
+
+ crypto_shash_init(shash);
+ while (cur < paddr + blocksize) {
+ void *kaddr;
+ size_t len = min(paddr + blocksize - cur,
+ PAGE_SIZE - offset_in_page(cur));
+
+ kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur));
+ crypto_shash_update(shash, kaddr, len);
+ kunmap_local(kaddr);
+ cur += len;
+ }
+ crypto_shash_final(shash, dest);
+ } else {
+ crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest);
+ }
+}
/*
* Verify the checksum for a single sector without any extra action that depend
* on the type of I/O.
*
* @kaddr must be a properly kmapped address.
*/
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
- const u8 * const csum_expected)
+int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
+ const u8 * const csum_expected)
{
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-
- shash->tfm = fs_info->csum_shash;
- crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
-
- if (memcmp(csum, csum_expected, fs_info->csum_size))
+ btrfs_calculate_block_csum(fs_info, paddr, csum);
+ if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0))
return -EIO;
return 0;
}
@@ -3360,17 +3389,16 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum
* Return %true if the sector is ok or had no checksum to start with, else %false.
*/
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
- u32 bio_offset, struct bio_vec *bv)
+ u32 bio_offset, phys_addr_t paddr)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 blocksize = fs_info->sectorsize;
+ struct folio *folio;
u64 file_offset = bbio->file_offset + bio_offset;
- u64 end = file_offset + bv->bv_len - 1;
+ u64 end = file_offset + blocksize - 1;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
- void *kaddr;
-
- ASSERT(bv->bv_len == fs_info->sectorsize);
if (!bbio->csum)
return true;
@@ -3386,12 +3414,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) *
fs_info->csum_size;
- kaddr = bvec_kmap_local(bv);
- if (btrfs_check_sector_csum(fs_info, kaddr, csum, csum_expected)) {
- kunmap_local(kaddr);
+ if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected))
goto zeroit;
- }
- kunmap_local(kaddr);
return true;
zeroit:
@@ -3399,7 +3423,9 @@ zeroit:
bbio->mirror_num);
if (dev)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
- memzero_bvec(bv);
+ folio = page_folio(phys_to_page(paddr));
+ ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
+ folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize);
return false;
}
@@ -3513,7 +3539,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
- if (ret && ret != -EEXIST) {
+ if (unlikely(ret && ret != -EEXIST)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3885,10 +3911,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
bool filled = false;
int first_xattr_slot;
- ret = btrfs_init_file_extent_tree(inode);
- if (ret)
- goto out;
-
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
filled = true;
@@ -3920,8 +3942,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime),
btrfs_timespec_nsec(leaf, &inode_item->atime));
@@ -3953,6 +3973,11 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
btrfs_set_inode_mapping_order(inode);
cache_index:
+ ret = btrfs_init_file_extent_tree(inode);
+ if (ret)
+ goto out;
+ btrfs_inode_set_file_extent_range(inode, 0,
+ round_up(i_size_read(vfs_inode), fs_info->sectorsize));
/*
* If we were modified in the current generation and evicted from memory
* and then re-read we need to do a full sync since we don't have any
@@ -4263,7 +4288,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_crit(fs_info,
"failed to delete reference to %.*s, root %llu inode %llu parent %llu",
name->len, name->name, btrfs_root_id(root), ino, dir_ino);
@@ -4275,7 +4300,7 @@ skip_backref:
rename_ctx->index = index;
ret = btrfs_delete_delayed_dir_index(trans, dir, index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -4430,7 +4455,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(leaf, di, &key);
WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4461,14 +4486,14 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_del_root_ref(trans, objectid,
btrfs_root_id(root), dir_ino,
&index, &fname.disk_name);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_delete_delayed_dir_index(trans, dir, index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4526,7 +4551,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of valid range.
@@ -4557,7 +4582,7 @@ static void btrfs_prune_dentries(struct btrfs_root *root)
inode = btrfs_find_first_inode(root, min_ino);
while (inode) {
- if (atomic_read(&inode->vfs_inode.i_count) > 1)
+ if (icount_read(&inode->vfs_inode) > 1)
d_prune_aliases(&inode->vfs_inode);
min_ino = btrfs_ino(inode) + 1;
@@ -4640,13 +4665,13 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_record_snapshot_destroy(trans, dir);
ret = btrfs_unlink_subvol(trans, dir, dentry);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
ret = btrfs_record_root_in_trans(trans, dest);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4660,7 +4685,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_insert_orphan_item(trans,
fs_info->tree_root,
btrfs_root_id(dest));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4668,7 +4693,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
BTRFS_UUID_KEY_SUBVOL, btrfs_root_id(dest));
- if (ret && ret != -ENOENT) {
+ if (unlikely(ret && ret != -ENOENT)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4677,7 +4702,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
dest->root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
btrfs_root_id(dest));
- if (ret && ret != -ENOENT) {
+ if (unlikely(ret && ret != -ENOENT)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4817,7 +4842,7 @@ again:
folio_put(folio);
goto again;
}
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
ret = -EIO;
goto out_unlock;
}
@@ -4905,7 +4930,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 e
goto out;
/*
- * Skip the truncatioin if the range in the target block is already aligned.
+ * Skip the truncation if the range in the target block is already aligned.
* The seemingly complex check will also handle the same block case.
*/
if (in_head_block && !IS_ALIGNED(start, blocksize))
@@ -4961,7 +4986,7 @@ again:
folio_put(folio);
goto again;
}
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
ret = -EIO;
goto out_unlock;
}
@@ -5081,7 +5106,7 @@ static int maybe_insert_hole(struct btrfs_inode *inode, u64 offset, u64 len)
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -5601,8 +5626,8 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
- if (location->type != BTRFS_INODE_ITEM_KEY &&
- location->type != BTRFS_ROOT_ITEM_KEY) {
+ if (unlikely(location->type != BTRFS_INODE_ITEM_KEY &&
+ location->type != BTRFS_ROOT_ITEM_KEY)) {
ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
@@ -5696,7 +5721,17 @@ static void btrfs_del_inode_from_root(struct btrfs_inode *inode)
bool empty = false;
xa_lock(&root->inodes);
- entry = __xa_erase(&root->inodes, btrfs_ino(inode));
+ /*
+ * This btrfs_inode is being freed and has already been unhashed at this
+ * point. It's possible that another btrfs_inode has already been
+ * allocated for the same inode and inserted itself into the root, so
+ * don't delete it in that case.
+ *
+ * Note that this shouldn't need to allocate memory, so the gfp flags
+ * don't really matter.
+ */
+ entry = __xa_cmpxchg(&root->inodes, btrfs_ino(inode), inode, NULL,
+ GFP_ATOMIC);
if (entry == inode)
empty = xa_empty(&root->inodes);
xa_unlock(&root->inodes);
@@ -5883,7 +5918,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return ERR_CAST(inode);
/* Do extra check against inode mode with di_type */
- if (btrfs_inode_type(inode) != di_type) {
+ if (unlikely(btrfs_inode_type(inode) != di_type)) {
btrfs_crit(fs_info,
"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
inode->vfs_inode.i_mode, btrfs_inode_type(inode),
@@ -6470,6 +6505,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
if (!args->subvol)
btrfs_inherit_iflags(BTRFS_I(inode), BTRFS_I(dir));
+ btrfs_set_inode_mapping_order(BTRFS_I(inode));
if (S_ISREG(inode->i_mode)) {
if (btrfs_test_opt(fs_info, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
@@ -6477,7 +6513,6 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
btrfs_update_inode_mapping_flags(BTRFS_I(inode));
- btrfs_set_inode_mapping_order(BTRFS_I(inode));
}
ret = btrfs_insert_inode_locked(inode);
@@ -6524,7 +6559,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
batch.total_data_size = sizes[0] + (args->orphan ? 0 : sizes[1]);
batch.nr = args->orphan ? 1 : 2;
ret = btrfs_insert_empty_items(trans, root, path, &batch);
- if (ret != 0) {
+ if (unlikely(ret != 0)) {
btrfs_abort_transaction(trans, ret);
goto discard;
}
@@ -6601,7 +6636,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
*/
if (!args->subvol) {
ret = btrfs_init_inode_security(trans, args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto discard;
}
@@ -6621,14 +6656,14 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
if (args->orphan) {
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto discard;
}
} else {
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
0, BTRFS_I(inode)->dir_index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto discard;
}
@@ -6659,7 +6694,7 @@ out:
*/
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
- const struct fscrypt_str *name, int add_backref, u64 index)
+ const struct fscrypt_str *name, bool add_backref, u64 index)
{
int ret = 0;
struct btrfs_key key;
@@ -6692,7 +6727,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
btrfs_inode_type(inode), index);
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
- else if (ret) {
+ else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -6848,7 +6883,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
/* Link added now we update the inode item with the new link count. */
inc_nlink(inode);
ret = btrfs_update_inode(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -6859,7 +6894,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
* open(2) O_TMPFILE flag.
*/
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -7067,7 +7102,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
/* Only regular file could have regular/prealloc extent */
- if (!S_ISREG(inode->vfs_inode.i_mode)) {
+ if (unlikely(!S_ISREG(inode->vfs_inode.i_mode))) {
ret = -EUCLEAN;
btrfs_crit(fs_info,
"regular/prealloc extent found for non-regular inode %llu",
@@ -7144,7 +7179,7 @@ not_found:
insert:
ret = 0;
btrfs_release_path(path);
- if (em->start > start || btrfs_extent_map_end(em) <= start) {
+ if (unlikely(em->start > start || btrfs_extent_map_end(em) <= start)) {
btrfs_err(fs_info,
"bad extent! em: [%llu %llu] passed [%llu %llu]",
em->start, em->len, start, len);
@@ -7964,7 +7999,7 @@ int btrfs_drop_inode(struct inode *inode)
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
else
- return generic_drop_inode(inode);
+ return inode_generic_drop(inode);
}
static void init_once(void *foo)
@@ -7972,6 +8007,9 @@ static void init_once(void *foo)
struct btrfs_inode *ei = foo;
inode_init_once(&ei->vfs_inode);
+#ifdef CONFIG_FS_VERITY
+ ei->i_verity_info = NULL;
+#endif
}
void __cold btrfs_destroy_cachep(void)
@@ -8173,7 +8211,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
btrfs_ino(BTRFS_I(old_dir)),
new_idx);
if (ret) {
- if (need_abort)
+ if (unlikely(need_abort))
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8221,7 +8259,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8229,12 +8267,12 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
old_name, &old_rename_ctx);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8243,7 +8281,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8251,12 +8289,12 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
new_name, &new_rename_ctx);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_update_inode(trans, BTRFS_I(new_inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8264,14 +8302,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
new_name, 0, old_idx);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
old_name, 0, new_idx);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8512,7 +8550,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8520,12 +8558,12 @@ static int btrfs_rename(struct mnt_idmap *idmap,
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
&old_fname.disk_name, &rename_ctx);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8536,7 +8574,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8545,7 +8583,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
BTRFS_I(d_inode(new_dentry)),
&new_fname.disk_name);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8553,7 +8591,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (new_inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans,
BTRFS_I(d_inode(new_dentry)));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8562,7 +8600,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
&new_fname.disk_name, 0, index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -8576,7 +8614,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (flags & RENAME_WHITEOUT) {
ret = btrfs_create_new_inode(trans, &whiteout_args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
} else {
@@ -8870,7 +8908,7 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto out;
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
discard_new_inode(inode);
@@ -8882,7 +8920,7 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
key.offset = 0;
datasize = btrfs_file_extent_calc_inline_size(name_len);
ret = btrfs_insert_empty_item(trans, root, path, &key, datasize);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
discard_new_inode(inode);
@@ -9095,7 +9133,7 @@ next:
ret = btrfs_update_inode(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
if (own_trans)
btrfs_end_transaction(trans);
@@ -9263,7 +9301,7 @@ static ssize_t btrfs_encoded_read_inline(
ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
extent_start, 0);
if (ret) {
- if (ret > 0) {
+ if (unlikely(ret > 0)) {
/* The extent item disappeared? */
return -EIO;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7e13de2bdcbf..a454b5ba2097 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -376,13 +376,13 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
if (comp) {
ret = btrfs_set_prop(trans, inode, "btrfs.compression",
comp, strlen(comp), 0);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
} else {
ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0);
- if (ret && ret != -ENODATA) {
+ if (unlikely(ret && ret != -ENODATA)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -633,7 +633,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_clear_buffer_dirty(trans, leaf);
btrfs_tree_unlock(leaf);
ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
- if (ret2 < 0)
+ if (unlikely(ret2 < 0))
btrfs_abort_transaction(trans, ret2);
free_extent_buffer(leaf);
goto out;
@@ -654,14 +654,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
/* ... and new_root is owned by new_inode_args.inode now. */
ret = btrfs_record_root_in_trans(trans, new_root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -669,7 +669,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
ret = btrfs_create_new_inode(trans, &new_inode_args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -957,7 +957,7 @@ static noinline int btrfs_mksnapshot(struct dentry *parent,
/*
* Force new buffered writes to reserve space even when NOCOW is
- * possible. This is to avoid later writeback (running dealloc) to
+ * possible. This is to avoid later writeback (running delalloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
@@ -1251,7 +1251,7 @@ out:
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol)
+ void __user *arg, bool subvol)
{
struct btrfs_ioctl_vol_args *vol_args;
int ret;
@@ -2133,7 +2133,7 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
ret = btrfs_next_leaf(fs_info->tree_root, path);
if (ret < 0) {
goto out;
- } else if (ret > 0) {
+ } else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -2216,7 +2216,7 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
ret = btrfs_next_leaf(root, path);
if (ret < 0) {
goto out;
- } else if (ret > 0) {
+ } else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -2245,7 +2245,7 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
ret = btrfs_next_item(root, path);
if (ret < 0) {
goto out;
- } else if (ret > 0) {
+ } else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -4008,7 +4008,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
btrfs_root_id(root));
- if (ret && ret != -ENOENT) {
+ if (unlikely(ret && ret != -ENOENT)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
@@ -4032,7 +4032,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
ret = btrfs_uuid_tree_add(trans, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
btrfs_root_id(root));
- if (ret < 0 && ret != -EEXIST) {
+ if (unlikely(ret < 0 && ret != -EEXIST)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
@@ -4418,6 +4418,10 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
goto out_acct;
}
+ if (fs_info->sectorsize > PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out_acct;
+ }
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
@@ -4509,6 +4513,7 @@ out_acct:
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
{
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
struct btrfs_ioctl_encoded_io_args args;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -4522,6 +4527,11 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_acct;
}
+ if (fs_info->sectorsize > PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out_acct;
+ }
+
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out_acct;
@@ -4780,14 +4790,14 @@ out_fail:
static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
+ struct file *file = cmd->file;
+ struct btrfs_inode *inode = BTRFS_I(file->f_inode);
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
size_t copy_end;
int ret;
u64 disk_bytenr, disk_io_size;
- struct file *file;
- struct btrfs_inode *inode;
- struct btrfs_fs_info *fs_info;
- struct extent_io_tree *io_tree;
loff_t pos;
struct kiocb kiocb;
struct extent_state *cached_state = NULL;
@@ -4803,10 +4813,11 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
ret = -EPERM;
goto out_acct;
}
- file = cmd->file;
- inode = BTRFS_I(file->f_inode);
- fs_info = inode->root->fs_info;
- io_tree = &inode->io_tree;
+ if (fs_info->sectorsize > PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out_acct;
+ }
+
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (issue_flags & IO_URING_F_COMPAT) {
@@ -4933,9 +4944,10 @@ out_acct:
static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
+ struct file *file = cmd->file;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
loff_t pos;
struct kiocb kiocb;
- struct file *file;
ssize_t ret;
void __user *sqe_addr;
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
@@ -4948,8 +4960,11 @@ static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issu
ret = -EPERM;
goto out_acct;
}
+ if (fs_info->sectorsize > PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out_acct;
+ }
- file = cmd->file;
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (!(file->f_mode & FMODE_WRITE)) {
@@ -5223,13 +5238,13 @@ long btrfs_ioctl(struct file *file, unsigned int
case FITRIM:
return btrfs_ioctl_fitrim(fs_info, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0);
+ return btrfs_ioctl_snap_create(file, argp, false);
case BTRFS_IOC_SNAP_CREATE_V2:
- return btrfs_ioctl_snap_create_v2(file, argp, 0);
+ return btrfs_ioctl_snap_create_v2(file, argp, false);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1);
+ return btrfs_ioctl_snap_create(file, argp, true);
case BTRFS_IOC_SUBVOL_CREATE_V2:
- return btrfs_ioctl_snap_create_v2(file, argp, 1);
+ return btrfs_ioctl_snap_create_v2(file, argp, true);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp, false);
case BTRFS_IOC_SNAP_DESTROY_V2:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a3e6d9616e60..0035851d72b0 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -361,7 +361,7 @@ void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
atomic_inc(&lock->readers);
/*
- * Ensure the pending reader count is perceieved BEFORE this reader
+ * Ensure the pending reader count is perceived BEFORE this reader
* goes to sleep in case of active writers. This guarantees new writers
* won't be allowed and that the current reader will be woken up when
* the last active writer finishes its jobs.
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index af29df98ac14..a4673e7d95d7 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -74,7 +74,7 @@ enum btrfs_lock_nesting {
BTRFS_NESTING_NEW_ROOT,
/*
- * We are limited to MAX_LOCKDEP_SUBLCLASSES number of subclasses, so
+ * We are limited to MAX_LOCKDEP_SUBCLASSES number of subclasses, so
* add this in here and add a static_assert to keep us from going over
* the limit. As of this writing we're limited to 8, and we're
* definitely using 8, hence this check to keep us from messing up in
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index d403641889ca..4758f66da449 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -58,9 +58,6 @@
* 0x1000 | SegHdr N+1| Data payload N+1 ... |
*/
-#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
-#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
-
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@@ -68,7 +65,14 @@ struct workspace {
struct list_head list;
};
-static struct workspace_manager wsm;
+static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info)
+{
+ return lzo1x_worst_compress(fs_info->sectorsize);
+}
+static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info)
+{
+ return lzo1x_worst_compress(fs_info->sectorsize);
+}
void lzo_free_workspace(struct list_head *ws)
{
@@ -80,7 +84,7 @@ void lzo_free_workspace(struct list_head *ws)
kfree(workspace);
}
-struct list_head *lzo_alloc_workspace(void)
+struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info)
{
struct workspace *workspace;
@@ -89,8 +93,8 @@ struct list_head *lzo_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
- workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
- workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
+ workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
+ workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@@ -128,19 +132,21 @@ static inline size_t read_compress_length(const char *buf)
*
* Will allocate new pages when needed.
*/
-static int copy_compressed_data_to_page(char *compressed_data,
+static int copy_compressed_data_to_page(struct btrfs_fs_info *fs_info,
+ char *compressed_data,
size_t compressed_size,
struct folio **out_folios,
unsigned long max_nr_folio,
- u32 *cur_out,
- const u32 sectorsize)
+ u32 *cur_out)
{
+ const u32 sectorsize = fs_info->sectorsize;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 sector_bytes_left;
u32 orig_out;
struct folio *cur_folio;
char *kaddr;
- if ((*cur_out / PAGE_SIZE) >= max_nr_folio)
+ if ((*cur_out >> min_folio_shift) >= max_nr_folio)
return -E2BIG;
/*
@@ -149,18 +155,17 @@ static int copy_compressed_data_to_page(char *compressed_data,
*/
ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize);
- cur_folio = out_folios[*cur_out / PAGE_SIZE];
+ cur_folio = out_folios[*cur_out >> min_folio_shift];
/* Allocate a new page */
if (!cur_folio) {
- cur_folio = btrfs_alloc_compr_folio();
+ cur_folio = btrfs_alloc_compr_folio(fs_info);
if (!cur_folio)
return -ENOMEM;
- out_folios[*cur_out / PAGE_SIZE] = cur_folio;
+ out_folios[*cur_out >> min_folio_shift] = cur_folio;
}
- kaddr = kmap_local_folio(cur_folio, 0);
- write_compress_length(kaddr + offset_in_page(*cur_out),
- compressed_size);
+ kaddr = kmap_local_folio(cur_folio, offset_in_folio(cur_folio, *cur_out));
+ write_compress_length(kaddr, compressed_size);
*cur_out += LZO_LEN;
orig_out = *cur_out;
@@ -172,20 +177,20 @@ static int copy_compressed_data_to_page(char *compressed_data,
kunmap_local(kaddr);
- if ((*cur_out / PAGE_SIZE) >= max_nr_folio)
+ if ((*cur_out >> min_folio_shift) >= max_nr_folio)
return -E2BIG;
- cur_folio = out_folios[*cur_out / PAGE_SIZE];
+ cur_folio = out_folios[*cur_out >> min_folio_shift];
/* Allocate a new page */
if (!cur_folio) {
- cur_folio = btrfs_alloc_compr_folio();
+ cur_folio = btrfs_alloc_compr_folio(fs_info);
if (!cur_folio)
return -ENOMEM;
- out_folios[*cur_out / PAGE_SIZE] = cur_folio;
+ out_folios[*cur_out >> min_folio_shift] = cur_folio;
}
kaddr = kmap_local_folio(cur_folio, 0);
- memcpy(kaddr + offset_in_page(*cur_out),
+ memcpy(kaddr + offset_in_folio(cur_folio, *cur_out),
compressed_data + *cur_out - orig_out, copy_len);
*cur_out += copy_len;
@@ -209,12 +214,15 @@ out:
return 0;
}
-int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
+int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
- const u32 sectorsize = inode_to_fs_info(mapping->host)->sectorsize;
+ const u32 sectorsize = fs_info->sectorsize;
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
struct folio *folio_in = NULL;
char *sizes_ptr;
const unsigned long max_nr_folio = *out_folios;
@@ -263,9 +271,9 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
- ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
+ ret = copy_compressed_data_to_page(fs_info, workspace->cbuf, out_len,
folios, max_nr_folio,
- &cur_out, sectorsize);
+ &cur_out);
if (ret < 0)
goto out;
@@ -280,8 +288,8 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
- /* Check if we have reached page boundary */
- if (PAGE_ALIGNED(cur_in)) {
+ /* Check if we have reached folio boundary. */
+ if (IS_ALIGNED(cur_in, min_folio_size)) {
folio_put(folio_in);
folio_in = NULL;
}
@@ -298,7 +306,7 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
out:
if (folio_in)
folio_put(folio_in);
- *out_folios = DIV_ROUND_UP(cur_out, PAGE_SIZE);
+ *out_folios = DIV_ROUND_UP(cur_out, min_folio_size);
return ret;
}
@@ -310,15 +318,16 @@ out:
static void copy_compressed_segment(struct compressed_bio *cb,
char *dest, u32 len, u32 *cur_in)
{
+ struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 orig_in = *cur_in;
while (*cur_in < orig_in + len) {
- struct folio *cur_folio;
- u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in),
- orig_in + len - *cur_in);
+ struct folio *cur_folio = cb->compressed_folios[*cur_in >> min_folio_shift];
+ u32 copy_len = min_t(u32, orig_in + len - *cur_in,
+ folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in));
ASSERT(copy_len);
- cur_folio = cb->compressed_folios[*cur_in / PAGE_SIZE];
memcpy_from_folio(dest + *cur_in - orig_in, cur_folio,
offset_in_folio(cur_folio, *cur_in), copy_len);
@@ -332,6 +341,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
struct workspace *workspace = list_entry(ws, struct workspace, list);
const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info;
const u32 sectorsize = fs_info->sectorsize;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
char *kaddr;
int ret;
/* Compressed data length, can be unaligned */
@@ -378,14 +388,14 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
*/
ASSERT(cur_in / sectorsize ==
(cur_in + LZO_LEN - 1) / sectorsize);
- cur_folio = cb->compressed_folios[cur_in / PAGE_SIZE];
+ cur_folio = cb->compressed_folios[cur_in >> min_folio_shift];
ASSERT(cur_folio);
kaddr = kmap_local_folio(cur_folio, 0);
- seg_len = read_compress_length(kaddr + offset_in_page(cur_in));
+ seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in));
kunmap_local(kaddr);
cur_in += LZO_LEN;
- if (unlikely(seg_len > WORKSPACE_CBUF_LENGTH)) {
+ if (unlikely(seg_len > workspace_cbuf_length(fs_info))) {
struct btrfs_inode *inode = cb->bbio.inode;
/*
@@ -445,19 +455,19 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
- size_t max_segment_len = WORKSPACE_BUF_LENGTH;
+ size_t max_segment_len = workspace_buf_length(fs_info);
int ret = 0;
- if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
+ if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2))
return -EUCLEAN;
in_len = read_compress_length(data_in);
- if (in_len != srclen)
+ if (unlikely(in_len != srclen))
return -EUCLEAN;
data_in += LZO_LEN;
in_len = read_compress_length(data_in);
- if (in_len != srclen - LZO_LEN * 2) {
+ if (unlikely(in_len != srclen - LZO_LEN * 2)) {
ret = -EUCLEAN;
goto out;
}
@@ -487,8 +497,7 @@ out:
return ret;
}
-const struct btrfs_compress_op btrfs_lzo_compress = {
- .workspace_manager = &wsm,
+const struct btrfs_compress_levels btrfs_lzo_compress = {
.max_level = 1,
.default_level = 1,
};
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 363fd28c0268..a0cf8effe008 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -18,6 +18,7 @@ static const char fs_state_chars[] = {
[BTRFS_FS_STATE_REMOUNTING] = 'M',
[BTRFS_FS_STATE_RO] = 0,
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
+ [BTRFS_FS_STATE_LOG_REPLAY_ABORTED] = 'O',
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
[BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index 022ebc89af85..4416c165644f 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -4,7 +4,6 @@
#define BTRFS_MESSAGES_H
#include <linux/types.h>
-#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index ff5eac84d819..60f9b000d644 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -11,6 +11,7 @@
#include <linux/pagemap.h>
#include <linux/math64.h>
#include <linux/rbtree.h>
+#include <linux/bio.h>
/*
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
@@ -20,6 +21,54 @@
name = (1U << __ ## name ## _BIT), \
__ ## name ## _SEQ = __ ## name ## _BIT
+static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter)
+{
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ return bvec_phys(&bv);
+}
+
+/*
+ * Iterate bio using btrfs block size.
+ *
+ * This will handle large folio and highmem.
+ *
+ * @paddr: Physical memory address of each iteration
+ * @bio: The bio to iterate
+ * @iter: The bvec_iter (pointer) to use.
+ * @blocksize: The blocksize to iterate.
+ *
+ * This requires all folios in the bio to cover at least one block.
+ */
+#define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \
+ for (; (iter)->bi_size && \
+ (paddr = bio_iter_phys((bio), (iter)), 1); \
+ bio_advance_iter_single((bio), (iter), (blocksize)))
+
+/* Initialize a bvec_iter to the size of the specified bio. */
+static inline struct bvec_iter init_bvec_iter_for_bio(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ u32 bio_size = 0;
+ int i;
+
+ bio_for_each_bvec_all(bvec, bio, i)
+ bio_size += bvec->bv_len;
+
+ return (struct bvec_iter) {
+ .bi_sector = 0,
+ .bi_size = bio_size,
+ .bi_idx = 0,
+ .bi_bvec_done = 0,
+ };
+}
+
+#define btrfs_bio_for_each_block_all(paddr, bio, blocksize) \
+ for (struct bvec_iter iter = init_bvec_iter_for_bio(bio); \
+ (iter).bi_size && \
+ (paddr = bio_iter_phys((bio), &(iter)), 1); \
+ bio_advance_iter_single((bio), &(iter), (blocksize)))
+
static inline void cond_wake_up(struct wait_queue_head *wq)
{
/*
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 74e38da9bd39..62b993fae54f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -6,12 +6,19 @@
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
+#include "file-item.h"
#include "print-tree.h"
#include "accessors.h"
#include "tree-checker.h"
#include "volumes.h"
#include "raid-stripe-tree.h"
+/*
+ * Large enough buffer size for the stringification of any key type yet short
+ * enough to use the stack and avoid allocations.
+ */
+#define KEY_TYPE_BUF_SIZE 32
+
struct root_name_map {
u64 id;
const char *name;
@@ -227,21 +234,209 @@ static void print_eb_refs_lock(const struct extent_buffer *eb)
#endif
}
+static void print_timespec(const struct extent_buffer *eb,
+ struct btrfs_timespec *timespec,
+ const char *prefix, const char *suffix)
+{
+ const u64 secs = btrfs_timespec_sec(eb, timespec);
+ const u32 nsecs = btrfs_timespec_nsec(eb, timespec);
+
+ pr_info("%s%llu.%u%s", prefix, secs, nsecs, suffix);
+}
+
+static void print_inode_item(const struct extent_buffer *eb, int i)
+{
+ struct btrfs_inode_item *ii = btrfs_item_ptr(eb, i, struct btrfs_inode_item);
+
+ pr_info("\t\tinode generation %llu transid %llu size %llu nbytes %llu\n",
+ btrfs_inode_generation(eb, ii), btrfs_inode_transid(eb, ii),
+ btrfs_inode_size(eb, ii), btrfs_inode_nbytes(eb, ii));
+ pr_info("\t\tblock group %llu mode %o links %u uid %u gid %u\n",
+ btrfs_inode_block_group(eb, ii), btrfs_inode_mode(eb, ii),
+ btrfs_inode_nlink(eb, ii), btrfs_inode_uid(eb, ii),
+ btrfs_inode_gid(eb, ii));
+ pr_info("\t\trdev %llu sequence %llu flags 0x%llx\n",
+ btrfs_inode_rdev(eb, ii), btrfs_inode_sequence(eb, ii),
+ btrfs_inode_flags(eb, ii));
+ print_timespec(eb, &ii->atime, "\t\tatime ", "\n");
+ print_timespec(eb, &ii->ctime, "\t\tctime ", "\n");
+ print_timespec(eb, &ii->mtime, "\t\tmtime ", "\n");
+ print_timespec(eb, &ii->otime, "\t\totime ", "\n");
+}
+
+static void print_dir_item(const struct extent_buffer *eb, int i)
+{
+ const u32 size = btrfs_item_size(eb, i);
+ struct btrfs_dir_item *di = btrfs_item_ptr(eb, i, struct btrfs_dir_item);
+ u32 cur = 0;
+
+ while (cur < size) {
+ const u32 name_len = btrfs_dir_name_len(eb, di);
+ const u32 data_len = btrfs_dir_data_len(eb, di);
+ const u32 len = sizeof(*di) + name_len + data_len;
+ struct btrfs_key location;
+
+ btrfs_dir_item_key_to_cpu(eb, di, &location);
+ pr_info("\t\tlocation key (%llu %u %llu) type %d\n",
+ location.objectid, location.type, location.offset,
+ btrfs_dir_ftype(eb, di));
+ pr_info("\t\ttransid %llu data_len %u name_len %u\n",
+ btrfs_dir_transid(eb, di), data_len, name_len);
+ di = (struct btrfs_dir_item *)((char *)di + len);
+ cur += len;
+ }
+}
+
+static void print_inode_ref_item(const struct extent_buffer *eb, int i)
+{
+ const u32 size = btrfs_item_size(eb, i);
+ struct btrfs_inode_ref *ref = btrfs_item_ptr(eb, i, struct btrfs_inode_ref);
+ u32 cur = 0;
+
+ while (cur < size) {
+ const u64 index = btrfs_inode_ref_index(eb, ref);
+ const u32 name_len = btrfs_inode_ref_name_len(eb, ref);
+ const u32 len = sizeof(*ref) + name_len;
+
+ pr_info("\t\tindex %llu name_len %u\n", index, name_len);
+ ref = (struct btrfs_inode_ref *)((char *)ref + len);
+ cur += len;
+ }
+}
+
+static void print_inode_extref_item(const struct extent_buffer *eb, int i)
+{
+ const u32 size = btrfs_item_size(eb, i);
+ struct btrfs_inode_extref *extref;
+ u32 cur = 0;
+
+ extref = btrfs_item_ptr(eb, i, struct btrfs_inode_extref);
+ while (cur < size) {
+ const u64 index = btrfs_inode_extref_index(eb, extref);
+ const u32 name_len = btrfs_inode_extref_name_len(eb, extref);
+ const u64 parent = btrfs_inode_extref_parent(eb, extref);
+ const u32 len = sizeof(*extref) + name_len;
+
+ pr_info("\t\tindex %llu parent %llu name_len %u\n",
+ index, parent, name_len);
+ extref = (struct btrfs_inode_extref *)((char *)extref + len);
+ cur += len;
+ }
+}
+
+static void print_dir_log_index_item(const struct extent_buffer *eb, int i)
+{
+ struct btrfs_dir_log_item *dlog;
+
+ dlog = btrfs_item_ptr(eb, i, struct btrfs_dir_log_item);
+ pr_info("\t\tdir log end %llu\n", btrfs_dir_log_end(eb, dlog));
+}
+
+static void print_extent_csum(const struct extent_buffer *eb, int i)
+{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
+ const u32 size = btrfs_item_size(eb, i);
+ const u32 csum_bytes = (size / fs_info->csum_size) * fs_info->sectorsize;
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(eb, &key, i);
+ pr_info("\t\trange start %llu end %llu length %u\n",
+ key.offset, key.offset + csum_bytes, csum_bytes);
+}
+
+static void print_file_extent_item(const struct extent_buffer *eb, int i)
+{
+ struct btrfs_file_extent_item *fi;
+
+ fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+ pr_info("\t\tgeneration %llu type %hhu\n",
+ btrfs_file_extent_generation(eb, fi),
+ btrfs_file_extent_type(eb, fi));
+
+ if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) {
+ pr_info("\t\tinline extent data size %u ram_bytes %llu compression %hhu\n",
+ btrfs_file_extent_inline_item_len(eb, i),
+ btrfs_file_extent_ram_bytes(eb, fi),
+ btrfs_file_extent_compression(eb, fi));
+ return;
+ }
+
+ pr_info("\t\textent data disk bytenr %llu nr %llu\n",
+ btrfs_file_extent_disk_bytenr(eb, fi),
+ btrfs_file_extent_disk_num_bytes(eb, fi));
+ pr_info("\t\textent data offset %llu nr %llu ram %llu\n",
+ btrfs_file_extent_offset(eb, fi),
+ btrfs_file_extent_num_bytes(eb, fi),
+ btrfs_file_extent_ram_bytes(eb, fi));
+ pr_info("\t\textent compression %hhu\n",
+ btrfs_file_extent_compression(eb, fi));
+}
+
+static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size)
+{
+ static const char *key_to_str[256] = {
+ [BTRFS_INODE_ITEM_KEY] = "INODE_ITEM",
+ [BTRFS_INODE_REF_KEY] = "INODE_REF",
+ [BTRFS_INODE_EXTREF_KEY] = "INODE_EXTREF",
+ [BTRFS_DIR_ITEM_KEY] = "DIR_ITEM",
+ [BTRFS_DIR_INDEX_KEY] = "DIR_INDEX",
+ [BTRFS_DIR_LOG_ITEM_KEY] = "DIR_LOG_ITEM",
+ [BTRFS_DIR_LOG_INDEX_KEY] = "DIR_LOG_INDEX",
+ [BTRFS_XATTR_ITEM_KEY] = "XATTR_ITEM",
+ [BTRFS_VERITY_DESC_ITEM_KEY] = "VERITY_DESC_ITEM",
+ [BTRFS_VERITY_MERKLE_ITEM_KEY] = "VERITY_MERKLE_ITEM",
+ [BTRFS_ORPHAN_ITEM_KEY] = "ORPHAN_ITEM",
+ [BTRFS_ROOT_ITEM_KEY] = "ROOT_ITEM",
+ [BTRFS_ROOT_REF_KEY] = "ROOT_REF",
+ [BTRFS_ROOT_BACKREF_KEY] = "ROOT_BACKREF",
+ [BTRFS_EXTENT_ITEM_KEY] = "EXTENT_ITEM",
+ [BTRFS_METADATA_ITEM_KEY] = "METADATA_ITEM",
+ [BTRFS_TREE_BLOCK_REF_KEY] = "TREE_BLOCK_REF",
+ [BTRFS_SHARED_BLOCK_REF_KEY] = "SHARED_BLOCK_REF",
+ [BTRFS_EXTENT_DATA_REF_KEY] = "EXTENT_DATA_REF",
+ [BTRFS_SHARED_DATA_REF_KEY] = "SHARED_DATA_REF",
+ [BTRFS_EXTENT_OWNER_REF_KEY] = "EXTENT_OWNER_REF",
+ [BTRFS_EXTENT_CSUM_KEY] = "EXTENT_CSUM",
+ [BTRFS_EXTENT_DATA_KEY] = "EXTENT_DATA",
+ [BTRFS_BLOCK_GROUP_ITEM_KEY] = "BLOCK_GROUP_ITEM",
+ [BTRFS_FREE_SPACE_INFO_KEY] = "FREE_SPACE_INFO",
+ [BTRFS_FREE_SPACE_EXTENT_KEY] = "FREE_SPACE_EXTENT",
+ [BTRFS_FREE_SPACE_BITMAP_KEY] = "FREE_SPACE_BITMAP",
+ [BTRFS_CHUNK_ITEM_KEY] = "CHUNK_ITEM",
+ [BTRFS_DEV_ITEM_KEY] = "DEV_ITEM",
+ [BTRFS_DEV_EXTENT_KEY] = "DEV_EXTENT",
+ [BTRFS_TEMPORARY_ITEM_KEY] = "TEMPORARY_ITEM",
+ [BTRFS_DEV_REPLACE_KEY] = "DEV_REPLACE",
+ [BTRFS_STRING_ITEM_KEY] = "STRING_ITEM",
+ [BTRFS_QGROUP_STATUS_KEY] = "QGROUP_STATUS",
+ [BTRFS_QGROUP_RELATION_KEY] = "QGROUP_RELATION",
+ [BTRFS_QGROUP_INFO_KEY] = "QGROUP_INFO",
+ [BTRFS_QGROUP_LIMIT_KEY] = "QGROUP_LIMIT",
+ [BTRFS_PERSISTENT_ITEM_KEY] = "PERSISTENT_ITEM",
+ [BTRFS_UUID_KEY_SUBVOL] = "UUID_KEY_SUBVOL",
+ [BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL",
+ [BTRFS_RAID_STRIPE_KEY] = "RAID_STRIPE",
+ };
+
+ if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID)
+ scnprintf(buf, buf_size, "UNTYPED");
+ else if (key_to_str[key->type])
+ scnprintf(buf, buf_size, key_to_str[key->type]);
+ else
+ scnprintf(buf, buf_size, "UNKNOWN.%d", key->type);
+}
+
void btrfs_print_leaf(const struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
int i;
u32 type, nr;
struct btrfs_root_item *ri;
- struct btrfs_dir_item *di;
- struct btrfs_inode_item *ii;
struct btrfs_block_group_item *bi;
- struct btrfs_file_extent_item *fi;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
struct btrfs_dev_extent *dev_extent;
struct btrfs_key key;
- struct btrfs_key found_key;
if (!l)
return;
@@ -255,25 +450,35 @@ void btrfs_print_leaf(const struct extent_buffer *l)
btrfs_leaf_free_space(l), btrfs_header_owner(l));
print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
+ char key_buf[KEY_TYPE_BUF_SIZE];
+
btrfs_item_key_to_cpu(l, &key, i);
type = key.type;
- pr_info("\titem %d key (%llu %u %llu) itemoff %d itemsize %d\n",
- i, key.objectid, type, key.offset,
+ key_type_string(&key, key_buf, KEY_TYPE_BUF_SIZE);
+
+ pr_info("\titem %d key (%llu %s %llu) itemoff %d itemsize %d\n",
+ i, key.objectid, key_buf, key.offset,
btrfs_item_offset(l, i), btrfs_item_size(l, i));
switch (type) {
case BTRFS_INODE_ITEM_KEY:
- ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
- pr_info("\t\tinode generation %llu size %llu mode %o\n",
- btrfs_inode_generation(l, ii),
- btrfs_inode_size(l, ii),
- btrfs_inode_mode(l, ii));
+ print_inode_item(l, i);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ print_inode_ref_item(l, i);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ print_inode_extref_item(l, i);
break;
case BTRFS_DIR_ITEM_KEY:
- di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
- btrfs_dir_item_key_to_cpu(l, di, &found_key);
- pr_info("\t\tdir oid %llu flags %u\n",
- found_key.objectid,
- btrfs_dir_flags(l, di));
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ print_dir_item(l, i);
+ break;
+ case BTRFS_DIR_LOG_INDEX_KEY:
+ print_dir_log_index_item(l, i);
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ print_extent_csum(l, i);
break;
case BTRFS_ROOT_ITEM_KEY:
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
@@ -303,24 +508,7 @@ void btrfs_print_leaf(const struct extent_buffer *l)
btrfs_shared_data_ref_count(l, sref));
break;
case BTRFS_EXTENT_DATA_KEY:
- fi = btrfs_item_ptr(l, i,
- struct btrfs_file_extent_item);
- pr_info("\t\tgeneration %llu type %hhu\n",
- btrfs_file_extent_generation(l, fi),
- btrfs_file_extent_type(l, fi));
- if (btrfs_file_extent_type(l, fi) ==
- BTRFS_FILE_EXTENT_INLINE) {
- pr_info("\t\tinline extent data size %llu\n",
- btrfs_file_extent_ram_bytes(l, fi));
- break;
- }
- pr_info("\t\textent data disk bytenr %llu nr %llu\n",
- btrfs_file_extent_disk_bytenr(l, fi),
- btrfs_file_extent_disk_num_bytes(l, fi));
- pr_info("\t\textent data offset %llu nr %llu ram %llu\n",
- btrfs_file_extent_offset(l, fi),
- btrfs_file_extent_num_bytes(l, fi),
- btrfs_file_extent_ram_bytes(l, fi));
+ print_file_extent_item(l, i);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index ccaa9a3cf1ce..1175b8192cd7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1069,7 +1069,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
}
path = btrfs_alloc_path();
- if (!path) {
+ if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out_free_root;
@@ -1081,7 +1081,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1111,7 +1111,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
if (ret > 0)
goto out_add_root;
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1129,7 +1129,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
/* We should not have a stray @prealloc pointer. */
ASSERT(prealloc == NULL);
prealloc = kzalloc(sizeof(*prealloc), GFP_NOFS);
- if (!prealloc) {
+ if (unlikely(!prealloc)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out_free_path;
@@ -1137,7 +1137,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1145,13 +1145,13 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset);
prealloc = NULL;
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_search_slot_for_read(tree_root, &found_key,
path, 1, 0);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1165,7 +1165,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
}
}
ret = btrfs_next_item(tree_root, path);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1176,7 +1176,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
out_add_root:
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1190,7 +1190,7 @@ out_add_root:
qgroup = add_qgroup_rb(fs_info, prealloc, BTRFS_FS_TREE_OBJECTID);
prealloc = NULL;
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -1376,13 +1376,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_free_qgroup_config(fs_info);
ret = btrfs_clean_quota_tree(trans, quota_root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_del_root(trans, &quota_root->root_key);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1455,6 +1455,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
struct btrfs_qgroup *qgroup;
LIST_HEAD(qgroup_list);
u64 num_bytes = src->excl;
+ u64 num_bytes_cmpr = src->excl_cmpr;
int ret = 0;
qgroup = find_qgroup_rb(fs_info, ref_root);
@@ -1466,11 +1467,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
struct btrfs_qgroup_list *glist;
qgroup->rfer += sign * num_bytes;
- qgroup->rfer_cmpr += sign * num_bytes;
+ qgroup->rfer_cmpr += sign * num_bytes_cmpr;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+ WARN_ON(sign < 0 && qgroup->excl_cmpr < num_bytes_cmpr);
qgroup->excl += sign * num_bytes;
- qgroup->excl_cmpr += sign * num_bytes;
+ qgroup->excl_cmpr += sign * num_bytes_cmpr;
if (sign > 0)
qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
@@ -2424,9 +2426,9 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
int i;
/* Level sanity check */
- if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
- root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
- root_level < cur_level) {
+ if (unlikely(cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
+ root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
+ root_level < cur_level)) {
btrfs_err_rl(fs_info,
"%s: bad levels, cur_level=%d root_level=%d",
__func__, cur_level, root_level);
@@ -2442,7 +2444,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
* dst_path->nodes[root_level] must be initialized before
* calling this function.
*/
- if (cur_level == root_level) {
+ if (unlikely(cur_level == root_level)) {
btrfs_err_rl(fs_info,
"%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
__func__, root_level, root_level, cur_level);
@@ -2528,7 +2530,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
return 0;
/* Wrong parameter order */
- if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
+ if (unlikely(btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb))) {
btrfs_err_rl(fs_info,
"%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
btrfs_header_generation(src_eb),
@@ -2536,7 +2538,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
return -EUCLEAN;
}
- if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
+ if (unlikely(!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb))) {
ret = -EIO;
goto out;
}
@@ -2727,7 +2729,7 @@ static void qgroup_iterator_nested_clean(struct list_head *head)
*/
static void qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
struct ulist *roots, struct list_head *qgroups,
- u64 seq, int update_old)
+ u64 seq, bool update_old)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
@@ -4708,8 +4710,8 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
- if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
- btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
+ if (unlikely(btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
+ btrfs_node_ptr_generation(reloc_parent, reloc_slot))) {
btrfs_err_rl(fs_info,
"%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
__func__,
@@ -4841,7 +4843,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
reloc_eb = NULL;
goto free_out;
}
- if (!extent_buffer_uptodate(reloc_eb)) {
+ if (unlikely(!extent_buffer_uptodate(reloc_eb))) {
ret = -EIO;
goto free_out;
}
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index cab0b291088c..cc6f6095cc9f 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -67,7 +67,7 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *stripe_root = fs_info->stripe_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *leaf;
u64 found_start;
@@ -260,7 +260,6 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
btrfs_release_path(path);
}
- btrfs_free_path(path);
return ret;
}
@@ -269,7 +268,7 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
struct btrfs_stripe_extent *stripe_extent,
const size_t item_size)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
int ret;
int slot;
@@ -288,7 +287,6 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
item_size);
- btrfs_free_path(path);
return ret;
}
@@ -306,7 +304,7 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
int ret;
stripe_extent = kzalloc(item_size, GFP_NOFS);
- if (!stripe_extent) {
+ if (!unlikely(stripe_extent)) {
btrfs_abort_transaction(trans, -ENOMEM);
btrfs_end_transaction(trans);
return -ENOMEM;
@@ -376,7 +374,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
struct btrfs_stripe_extent *stripe_extent;
struct btrfs_key stripe_key;
struct btrfs_key found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
const u64 end = logical + *length;
int num_stripes;
@@ -402,7 +400,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
if (ret < 0)
- goto free_path;
+ return ret;
if (ret) {
if (path->slots[0] != 0)
path->slots[0]--;
@@ -459,8 +457,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
stripe->physical, devid);
- ret = 0;
- goto free_path;
+ return 0;
}
/* If we're here, we haven't found the requested devid in the stripe. */
@@ -474,8 +471,6 @@ out:
logical, logical + *length, stripe->dev->devid,
btrfs_bg_type_to_raid_name(map_type));
}
-free_path:
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 3ff2bedfb3a4..0135dceb7baa 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1167,7 +1167,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
/* Check if we have reached tolerance early. */
found_errors = get_rbio_veritical_errors(rbio, sector_nr,
NULL, NULL);
- if (found_errors > rbio->bioc->max_errors)
+ if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
return 0;
}
@@ -1208,17 +1208,16 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
struct bvec_iter iter = bio->bi_iter;
+ phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
- while (iter.bi_size) {
+ btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) {
unsigned int index = (offset >> sectorsize_bits);
struct sector_ptr *sector = &rbio->bio_sectors[index];
- struct bio_vec bv = bio_iter_iovec(bio, iter);
sector->has_paddr = true;
- sector->paddr = bvec_phys(&bv);
- bio_advance_iter_single(bio, &iter, sectorsize);
+ sector->paddr = paddr;
offset += sectorsize;
}
}
@@ -1511,22 +1510,17 @@ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ const u32 blocksize = rbio->bioc->fs_info->sectorsize;
+ phys_addr_t paddr;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct sector_ptr *sector;
- phys_addr_t paddr = bvec_phys(bvec);
+ btrfs_bio_for_each_block_all(paddr, bio, blocksize) {
+ struct sector_ptr *sector = find_stripe_sector(rbio, paddr);
- for (u32 off = 0; off < bvec->bv_len; off += sectorsize) {
- sector = find_stripe_sector(rbio, paddr + off);
- ASSERT(sector);
- if (sector)
- sector->uptodate = 1;
- }
+ ASSERT(sector);
+ if (sector)
+ sector->uptodate = 1;
}
}
@@ -1573,8 +1567,7 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
int total_sector_nr = get_bio_sector_nr(rbio, bio);
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ phys_addr_t paddr;
/* No data csum for the whole stripe, no need to verify. */
if (!rbio->csum_bitmap || !rbio->csum_buf)
@@ -1584,27 +1577,20 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
return;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- void *kaddr;
-
- kaddr = bvec_kmap_local(bvec);
- for (u32 off = 0; off < bvec->bv_len;
- off += fs_info->sectorsize, total_sector_nr++) {
- u8 csum_buf[BTRFS_CSUM_SIZE];
- u8 *expected_csum = rbio->csum_buf +
- total_sector_nr * fs_info->csum_size;
- int ret;
+ btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) {
+ u8 csum_buf[BTRFS_CSUM_SIZE];
+ u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size;
+ int ret;
- /* No csum for this sector, skip to the next sector. */
- if (!test_bit(total_sector_nr, rbio->csum_bitmap))
- continue;
+ /* No csum for this sector, skip to the next sector. */
+ if (!test_bit(total_sector_nr, rbio->csum_bitmap))
+ continue;
- ret = btrfs_check_sector_csum(fs_info, kaddr + off,
- csum_buf, expected_csum);
- if (ret < 0)
- set_bit(total_sector_nr, rbio->error_bitmap);
- }
- kunmap_local(kaddr);
+ ret = btrfs_check_block_csum(fs_info, paddr,
+ csum_buf, expected_csum);
+ if (ret < 0)
+ set_bit(total_sector_nr, rbio->error_bitmap);
+ total_sector_nr++;
}
}
@@ -1802,7 +1788,6 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
struct sector_ptr *sector;
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *csum_expected;
- void *kaddr;
int ret;
if (!rbio->csum_bitmap || !rbio->csum_buf)
@@ -1824,9 +1809,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
- kaddr = kmap_local_sector(sector);
- ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
- kunmap_local(kaddr);
+ ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected);
return ret;
}
@@ -1864,7 +1847,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
if (!found_errors)
return 0;
- if (found_errors > rbio->bioc->max_errors)
+ if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
/*
@@ -2416,7 +2399,7 @@ static void rmw_rbio(struct btrfs_raid_bio *rbio)
int found_errors;
found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL);
- if (found_errors > rbio->bioc->max_errors) {
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
@@ -2705,7 +2688,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
found_errors = get_rbio_veritical_errors(rbio, sector_nr,
&faila, &failb);
- if (found_errors > rbio->bioc->max_errors) {
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
goto out;
}
@@ -2729,7 +2712,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* data, so the capability of the repair is declined. (In the
* case of RAID5, we can not repair anything.)
*/
- if (dfail > rbio->bioc->max_errors - 1) {
+ if (unlikely(dfail > rbio->bioc->max_errors - 1)) {
ret = -EIO;
goto out;
}
@@ -2746,7 +2729,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* scrubbing parity, luckily, use the other one to repair the
* data, or we can not repair the data stripe.
*/
- if (failp != rbio->scrubp) {
+ if (unlikely(failp != rbio->scrubp)) {
ret = -EIO;
goto out;
}
@@ -2837,7 +2820,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
int found_errors;
found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL);
- if (found_errors > rbio->bioc->max_errors) {
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
@@ -2861,19 +2844,22 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
* This is for scrub call sites where we already have correct data contents.
* This allows us to avoid reading data stripes again.
*
- * Unfortunately here we have to do page copy, other than reusing the pages.
+ * Unfortunately here we have to do folio copy, other than reusing the pages.
* This is due to the fact rbio has its own page management for its cache.
*/
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
- struct page **data_pages, u64 data_logical)
+void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
+ struct folio **data_folios, u64 data_logical)
{
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 offset_in_full_stripe = data_logical -
rbio->bioc->full_stripe_logical;
- const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+ unsigned int findex = 0;
+ unsigned int foffset = 0;
int ret;
+ /* We shouldn't hit RAID56 for bs > ps cases for now. */
+ ASSERT(fs_info->sectorsize <= PAGE_SIZE);
+
/*
* If we hit ENOMEM temporarily, but later at
* raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@@ -2890,14 +2876,25 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
- for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
- struct page *dst = rbio->stripe_pages[page_nr + page_index];
- struct page *src = data_pages[page_nr];
+ for (unsigned int cur_off = offset_in_full_stripe;
+ cur_off < offset_in_full_stripe + BTRFS_STRIPE_LEN;
+ cur_off += PAGE_SIZE) {
+ const unsigned int pindex = cur_off >> PAGE_SHIFT;
+ void *kaddr;
+
+ kaddr = kmap_local_page(rbio->stripe_pages[pindex]);
+ memcpy_from_folio(kaddr, data_folios[findex], foffset, PAGE_SIZE);
+ kunmap_local(kaddr);
- memcpy_page(dst, 0, src, 0, PAGE_SIZE);
- for (int sector_nr = sectors_per_page * page_index;
- sector_nr < sectors_per_page * (page_index + 1);
- sector_nr++)
- rbio->stripe_sectors[sector_nr].uptodate = true;
+ foffset += PAGE_SIZE;
+ ASSERT(foffset <= folio_size(data_folios[findex]));
+ if (foffset == folio_size(data_folios[findex])) {
+ findex++;
+ foffset = 0;
+ }
}
+ for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits;
+ sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits;
+ sector_nr++)
+ rbio->stripe_sectors[sector_nr].uptodate = true;
}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 0d7b4c2fb6ae..84c4d1d29c7a 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -201,8 +201,8 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
- struct page **data_pages, u64 data_logical);
+void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
+ struct folio **data_folios, u64 data_logical);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 3871c3a6c743..de4cb0f3fbd0 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -971,7 +971,7 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *extent_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
int tree_block_level = 0;
u64 bytenr = 0, num_bytes = 0;
@@ -980,11 +980,18 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
if (!btrfs_test_opt(fs_info, REF_VERIFY))
return 0;
+ extent_root = btrfs_extent_root(fs_info, 0);
+ /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */
+ if (IS_ERR(extent_root)) {
+ btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling");
+ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ return 0;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- extent_root = btrfs_extent_root(fs_info, 0);
eb = btrfs_read_lock_root_node(extent_root);
level = btrfs_header_level(eb);
path->nodes[level] = eb;
@@ -1014,6 +1021,5 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
btrfs_free_ref_cache(fs_info);
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
}
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index 559bd25a2b7a..1ce544d53cc5 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -12,7 +12,7 @@
struct btrfs_fs_info;
struct btrfs_ref;
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+#ifdef CONFIG_BTRFS_DEBUG
#include <linux/spinlock.h>
@@ -53,6 +53,6 @@ static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
{
}
-#endif /* CONFIG_BTRFS_FS_REF_VERIFY */
+#endif /* CONFIG_BTRFS_DEBUG */
#endif
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index ce25ab7f0e99..5465a5eae9b2 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -23,7 +23,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
u64 endoff,
const u64 destoff,
const u64 olen,
- int no_time_update)
+ bool no_time_update)
{
int ret;
@@ -43,7 +43,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
}
ret = btrfs_update_inode(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@@ -268,12 +268,12 @@ copy_inline_extent:
drop_args.end = aligned_end;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -285,7 +285,7 @@ copy_inline_extent:
btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found);
btrfs_set_inode_full_sync(inode);
ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end);
- if (ret)
+ if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
out:
if (!ret && !trans) {
@@ -337,10 +337,10 @@ copy_to_page:
*/
static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
- const u64 destoff, int no_time_update)
+ const u64 destoff, bool no_time_update)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans;
char *buf = NULL;
@@ -611,7 +611,6 @@ process_slot:
}
out:
- btrfs_free_path(path);
kvfree(buf);
clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 7256f6748c8f..8dd8de6b9fb8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -821,7 +821,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
u64 bytenr, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
int ret;
@@ -834,11 +834,9 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ return ret;
+ if (ret > 0)
+ return -ENOENT;
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -849,16 +847,11 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi));
- if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
- ret = -EINVAL;
- goto out;
- }
+ if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi))
+ return -EINVAL;
*new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
/*
@@ -974,7 +967,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_init_data_ref(&ref, key.objectid, key.offset,
btrfs_root_id(root), false);
ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -988,7 +981,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_init_data_ref(&ref, key.objectid, key.offset,
btrfs_root_id(root), false);
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -1199,7 +1192,7 @@ again:
ref.ref_root = btrfs_root_id(src);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -1212,7 +1205,7 @@ again:
ref.ref_root = btrfs_root_id(dest);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -1226,7 +1219,7 @@ again:
ref.ref_root = btrfs_root_id(src);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -1240,7 +1233,7 @@ again:
ref.ref_root = btrfs_root_id(dest);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_free_extent(trans, &ref);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@@ -1490,7 +1483,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
* ->reloc_root. If it fails however we must
* drop the ref ourselves.
*/
- ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
+ ret2 = btrfs_drop_snapshot(reloc_root, false, true);
if (ret2 < 0) {
btrfs_put_root(reloc_root);
if (!ret)
@@ -1500,7 +1493,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
btrfs_put_root(root);
} else {
/* Orphan reloc tree, just clean it up */
- ret2 = btrfs_drop_snapshot(root, 0, 1);
+ ret2 = btrfs_drop_snapshot(root, false, true);
if (ret2 < 0) {
btrfs_put_root(root);
if (!ret)
@@ -1791,7 +1784,7 @@ again:
list_add(&reloc_root->root_list, &reloc_roots);
btrfs_put_root(root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
if (!err)
err = ret;
@@ -1960,7 +1953,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
DEBUG_WARN("error %ld reading root for reloc root", PTR_ERR(root));
return PTR_ERR(root);
}
- if (root->reloc_root != reloc_root) {
+ if (unlikely(root->reloc_root != reloc_root)) {
DEBUG_WARN("unexpected reloc root found");
btrfs_err(fs_info,
"root %llu has two reloc roots associated with it",
@@ -2031,7 +2024,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
if (!root)
return ERR_PTR(-ENOENT);
- if (next->new_bytenr) {
+ if (unlikely(next->new_bytenr)) {
/*
* We just created the reloc root, so we shouldn't have
* ->new_bytenr set yet. If it is then we have multiple roots
@@ -2090,7 +2083,7 @@ struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
* This can occur if we have incomplete extent refs leading all
* the way up a particular path, in this case return -EUCLEAN.
*/
- if (!root)
+ if (unlikely(!root))
return ERR_PTR(-EUCLEAN);
/* No other choice for non-shareable tree */
@@ -2277,7 +2270,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
bytenr = btrfs_node_blockptr(upper->eb, slot);
if (lowest) {
- if (bytenr != node->bytenr) {
+ if (unlikely(bytenr != node->bytenr)) {
btrfs_err(root->fs_info,
"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
bytenr, node->bytenr, slot,
@@ -2332,7 +2325,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (!ret)
ret = btrfs_drop_subtree(trans, root, eb,
upper->eb);
- if (ret)
+ if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
}
next:
@@ -2454,7 +2447,7 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
eb = read_tree_block(fs_info, block->bytenr, &check);
if (IS_ERR(eb))
return PTR_ERR(eb);
- if (!extent_buffer_uptodate(eb)) {
+ if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
return -EIO;
}
@@ -2519,7 +2512,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
* normal user in the case of corruption.
*/
ASSERT(node->new_bytenr == 0);
- if (node->new_bytenr) {
+ if (unlikely(node->new_bytenr)) {
btrfs_err(root->fs_info,
"bytenr %llu has improper references to it",
node->bytenr);
@@ -2839,7 +2832,7 @@ again:
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
folio_lock(folio);
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
ret = -EIO;
goto release_folio;
}
@@ -3158,7 +3151,7 @@ static int __add_tree_block(struct reloc_control *rc,
struct rb_root *blocks)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret;
bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@@ -3186,7 +3179,7 @@ again:
path->skip_locking = 1;
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0 && skinny) {
if (path->slots[0]) {
@@ -3213,14 +3206,10 @@ again:
"tree block extent item (%llu) is not found in extent tree",
bytenr);
WARN_ON(1);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
- ret = add_tree_block(rc, &key, path, blocks);
-out:
- btrfs_free_path(path);
- return ret;
+ return add_tree_block(rc, &key, path, blocks);
}
static int delete_block_group_cache(struct btrfs_block_group *block_group,
@@ -3510,7 +3499,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
struct rb_root blocks = RB_ROOT;
struct btrfs_key key;
struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_extent_item *ei;
u64 flags;
int ret;
@@ -3679,14 +3668,13 @@ out_free:
if (ret < 0 && !err)
err = ret;
btrfs_free_block_rsv(fs_info, rc->block_rsv);
- btrfs_free_path(path);
return err;
}
static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_inode_item *item;
struct extent_buffer *leaf;
int ret;
@@ -3697,7 +3685,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_inode(trans, root, path, objectid);
if (ret)
- goto out;
+ return ret;
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
@@ -3707,15 +3695,13 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC);
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
static void delete_orphan_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret = 0;
@@ -3738,7 +3724,6 @@ static void delete_orphan_inode(struct btrfs_trans_handle *trans,
out:
if (ret)
btrfs_abort_transaction(trans, ret);
- btrfs_free_path(path);
}
/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index e22e6b06927a..d07eab70f759 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -85,7 +85,7 @@ int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of the valid range.
*/
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -130,7 +130,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
*item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *l;
int ret;
int slot;
@@ -143,15 +143,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0)
- goto out;
+ return ret;
- if (ret > 0) {
+ if (unlikely(ret > 0)) {
btrfs_crit(fs_info,
"unable to find root key (%llu %u %llu) in tree %llu",
key->objectid, key->type, key->offset, btrfs_root_id(root));
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
l = path->nodes[0];
@@ -168,22 +168,22 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
ret = btrfs_del_item(trans, root, path);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
l = path->nodes[0];
slot = path->slots[0];
@@ -197,8 +197,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
write_extent_buffer(l, item, ptr, sizeof(*item));
-out:
- btrfs_free_path(path);
return ret;
}
@@ -216,7 +214,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root *root;
int err = 0;
@@ -309,7 +307,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
btrfs_put_root(root);
}
- btrfs_free_path(path);
return err;
}
@@ -318,7 +315,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
const struct btrfs_key *key)
{
struct btrfs_root *root = trans->fs_info->tree_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
path = btrfs_alloc_path();
@@ -326,17 +323,12 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
- goto out;
- if (ret != 0) {
+ return ret;
+ if (unlikely(ret > 0))
/* The root must exist but we did not find it by the key. */
- ret = -EUCLEAN;
- goto out;
- }
+ return -EUCLEAN;
- ret = btrfs_del_item(trans, root, path);
-out:
- btrfs_free_path(path);
- return ret;
+ return btrfs_del_item(trans, root, path);
}
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
@@ -344,7 +336,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
const struct fscrypt_str *name)
{
struct btrfs_root *tree_root = trans->fs_info->tree_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -361,7 +353,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0) {
- goto out;
+ return ret;
} else if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
@@ -369,18 +361,16 @@ again:
ptr = (unsigned long)(ref + 1);
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
(btrfs_root_ref_name_len(leaf, ref) != name->len) ||
- memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
- ret = -ENOENT;
- goto out;
- }
+ memcmp_extent_buffer(leaf, name->name, ptr, name->len))
+ return -ENOENT;
+
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out;
+ return ret;
} else {
- ret = -ENOENT;
- goto out;
+ return -ENOENT;
}
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
@@ -391,8 +381,6 @@ again:
goto again;
}
-out:
- btrfs_free_path(path);
return ret;
}
@@ -418,7 +406,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_key key;
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
unsigned long ptr;
@@ -433,9 +421,8 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name->len);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- btrfs_free_path(path);
return ret;
}
@@ -455,7 +442,6 @@ again:
goto again;
}
- btrfs_free_path(path);
return 0;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6776e6ab8d10..4691d0bdb2e8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -113,7 +113,7 @@ enum {
/* Which blocks are covered by extent items. */
scrub_bitmap_nr_has_extent = 0,
- /* Which blocks are meteadata. */
+ /* Which blocks are metadata. */
scrub_bitmap_nr_is_metadata,
/*
@@ -130,7 +130,7 @@ enum {
scrub_bitmap_nr_last,
};
-#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
+#define SCRUB_STRIPE_MAX_FOLIOS (BTRFS_STRIPE_LEN / PAGE_SIZE)
/*
* Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
@@ -139,7 +139,7 @@ struct scrub_stripe {
struct scrub_ctx *sctx;
struct btrfs_block_group *bg;
- struct page *pages[SCRUB_STRIPE_PAGES];
+ struct folio *folios[SCRUB_STRIPE_MAX_FOLIOS];
struct scrub_sector_verification *sectors;
struct btrfs_device *dev;
@@ -206,7 +206,7 @@ struct scrub_ctx {
ktime_t throttle_deadline;
u64 throttle_sent;
- int is_dev_replace;
+ bool is_dev_replace;
u64 write_pointer;
struct mutex wr_lock;
@@ -339,10 +339,10 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
if (!stripe)
return;
- for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) {
- if (stripe->pages[i])
- __free_page(stripe->pages[i]);
- stripe->pages[i] = NULL;
+ for (int i = 0; i < SCRUB_STRIPE_MAX_FOLIOS; i++) {
+ if (stripe->folios[i])
+ folio_put(stripe->folios[i]);
+ stripe->folios[i] = NULL;
}
kfree(stripe->sectors);
kfree(stripe->csums);
@@ -355,6 +355,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
struct scrub_stripe *stripe)
{
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
int ret;
memset(stripe, 0, sizeof(*stripe));
@@ -367,7 +368,9 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
atomic_set(&stripe->pending_io, 0);
spin_lock_init(&stripe->write_error_lock);
- ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
+ ASSERT(BTRFS_STRIPE_LEN >> min_folio_shift <= SCRUB_STRIPE_MAX_FOLIOS);
+ ret = btrfs_alloc_folio_array(BTRFS_STRIPE_LEN >> min_folio_shift,
+ fs_info->block_min_order, stripe->folios);
if (ret < 0)
goto error;
@@ -446,7 +449,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx)
}
static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
- struct btrfs_fs_info *fs_info, int is_dev_replace)
+ struct btrfs_fs_info *fs_info, bool is_dev_replace)
{
struct scrub_ctx *sctx;
int i;
@@ -585,7 +588,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
bool is_super, u64 logical, u64 physical)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key found_key;
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
@@ -612,7 +615,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
&flags);
if (ret < 0)
- goto out;
+ return;
swarn.extent_item_size = found_key.offset;
@@ -658,9 +661,6 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn);
}
-
-out:
- btrfs_free_path(path);
}
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
@@ -687,13 +687,30 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
{
- u32 offset = (sector_nr << stripe->bg->fs_info->sectorsize_bits);
- const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ u32 offset = (sector_nr << fs_info->sectorsize_bits);
+ const struct folio *folio = stripe->folios[offset >> min_folio_shift];
- /* stripe->pages[] is allocated by us and no highmem is allowed. */
- ASSERT(page);
- ASSERT(!PageHighMem(page));
- return page_address(page) + offset_in_page(offset);
+ /* stripe->folios[] is allocated by us and no highmem is allowed. */
+ ASSERT(folio);
+ ASSERT(!folio_test_partial_kmap(folio));
+ return folio_address(folio) + offset_in_folio(folio, offset);
+}
+
+static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr)
+{
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ u32 offset = (sector_nr << fs_info->sectorsize_bits);
+ const struct folio *folio = stripe->folios[offset >> min_folio_shift];
+
+ /* stripe->folios[] is allocated by us and no highmem is allowed. */
+ ASSERT(folio);
+ ASSERT(!folio_test_partial_kmap(folio));
+ /* And the range must be contained inside the folio. */
+ ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio));
+ return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset);
}
static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
@@ -788,7 +805,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct scrub_sector_verification *sector = &stripe->sectors[sector_nr];
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
- void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+ phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr);
u8 csum_buf[BTRFS_CSUM_SIZE];
int ret;
@@ -833,7 +850,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
return;
}
- ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum);
+ ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum);
if (ret < 0) {
scrub_bitmap_set_bit_csum_error(stripe, sector_nr);
scrub_bitmap_set_bit_error(stripe, sector_nr);
@@ -1369,8 +1386,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
* Slice is divided into intervals when the IO is submitted, adjust by
* bwlimit and maximum of 64 intervals.
*/
- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
- div = min_t(u32, 64, div);
+ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
/* Start new epoch, set deadline */
now = ktime_get();
@@ -1513,7 +1529,7 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -1859,6 +1875,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
@@ -1871,7 +1888,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
return;
}
- bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
+ bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info,
scrub_read_endio, stripe);
bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
@@ -1970,7 +1987,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
* metadata, we should immediately abort.
*/
for (int i = 0; i < nr_stripes; i++) {
- if (stripe_has_metadata_error(&sctx->stripes[i])) {
+ if (unlikely(stripe_has_metadata_error(&sctx->stripes[i]))) {
ret = -EIO;
goto out;
}
@@ -2164,7 +2181,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
* As we may hit an empty data stripe while it's missing.
*/
bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
- if (!bitmap_empty(&error, stripe->nr_sectors)) {
+ if (unlikely(!bitmap_empty(&error, stripe->nr_sectors))) {
btrfs_err(fs_info,
"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
@@ -2202,7 +2219,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
- raid56_parity_cache_data_pages(rbio, stripe->pages,
+ raid56_parity_cache_data_folios(rbio, stripe->folios,
full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
}
raid56_parity_submit_scrub_rbio(rbio);
@@ -2586,7 +2603,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev, u64 start, u64 end)
{
struct btrfs_dev_extent *dev_extent = NULL;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_root *root = fs_info->dev_root;
u64 chunk_offset;
@@ -2858,8 +2875,8 @@ skip_unfreeze:
btrfs_put_block_group(cache);
if (ret)
break;
- if (sctx->is_dev_replace &&
- atomic64_read(&dev_replace->num_write_errors) > 0) {
+ if (unlikely(sctx->is_dev_replace &&
+ atomic64_read(&dev_replace->num_write_errors) > 0)) {
ret = -EIO;
break;
}
@@ -2872,8 +2889,6 @@ skip:
btrfs_release_path(path);
}
- btrfs_free_path(path);
-
return ret;
}
@@ -2889,13 +2904,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
if (ret < 0)
return ret;
ret = btrfs_check_super_csum(fs_info, sb);
- if (ret != 0) {
+ if (unlikely(ret != 0)) {
btrfs_err_rl(fs_info,
"scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
- if (btrfs_super_generation(sb) != generation) {
+ if (unlikely(btrfs_super_generation(sb) != generation)) {
btrfs_err_rl(fs_info,
"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
@@ -3013,7 +3028,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
- int readonly, int is_dev_replace)
+ bool readonly, bool is_dev_replace)
{
struct btrfs_dev_lookup_args args = { .devid = devid };
struct scrub_ctx *sctx;
@@ -3065,8 +3080,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
}
mutex_lock(&fs_info->scrub_lock);
- if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
- test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
+ if (unlikely(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state))) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -EIO;
diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h
index f0df597b75c7..aa68b6ebaf55 100644
--- a/fs/btrfs/scrub.h
+++ b/fs/btrfs/scrub.h
@@ -11,7 +11,7 @@ struct btrfs_scrub_progress;
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
- int readonly, int is_dev_replace);
+ bool readonly, bool is_dev_replace);
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7664025a5af4..9230e5066fc6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -646,7 +646,7 @@ static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
ret = kernel_write(filp, buf + pos, len - pos, off);
if (ret < 0)
return ret;
- if (ret == 0)
+ if (unlikely(ret == 0))
return -EIO;
pos += ret;
}
@@ -909,7 +909,7 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
struct btrfs_inode_info *info)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_inode_item *ii;
struct btrfs_key key;
@@ -924,11 +924,11 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
if (ret) {
if (ret > 0)
ret = -ENOENT;
- goto out;
+ return ret;
}
if (!info)
- goto out;
+ return 0;
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -945,9 +945,7 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
*/
info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
@@ -973,13 +971,13 @@ typedef int (*iterate_inode_ref_t)(u64 dir, struct fs_path *p, void *ctx);
* path must point to the INODE_REF or INODE_EXTREF when called.
*/
static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *found_key, int resolve,
+ struct btrfs_key *found_key, bool resolve,
iterate_inode_ref_t iterate, void *ctx)
{
struct extent_buffer *eb = path->nodes[0];
struct btrfs_inode_ref *iref;
struct btrfs_inode_extref *extref;
- struct btrfs_path *tmp_path;
+ BTRFS_PATH_AUTO_FREE(tmp_path);
struct fs_path *p;
u32 cur = 0;
u32 total;
@@ -1076,7 +1074,6 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
}
out:
- btrfs_free_path(tmp_path);
fs_path_free(p);
return ret;
}
@@ -1224,7 +1221,7 @@ static int get_inode_path(struct btrfs_root *root,
{
int ret;
struct btrfs_key key, found_key;
- struct btrfs_path *p;
+ BTRFS_PATH_AUTO_FREE(p);
p = alloc_path_for_send();
if (!p)
@@ -1238,28 +1235,20 @@ static int get_inode_path(struct btrfs_root *root,
ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
if (ret < 0)
- goto out;
- if (ret) {
- ret = 1;
- goto out;
- }
+ return ret;
+ if (ret)
+ return 1;
+
btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
if (found_key.objectid != ino ||
(found_key.type != BTRFS_INODE_REF_KEY &&
- found_key.type != BTRFS_INODE_EXTREF_KEY)) {
- ret = -ENOENT;
- goto out;
- }
+ found_key.type != BTRFS_INODE_EXTREF_KEY))
+ return -ENOENT;
- ret = iterate_inode_ref(root, p, &found_key, 1,
- __copy_first_ref, path);
+ ret = iterate_inode_ref(root, p, &found_key, true, __copy_first_ref, path);
if (ret < 0)
- goto out;
- ret = 0;
-
-out:
- btrfs_free_path(p);
- return ret;
+ return ret;
+ return 0;
}
struct backref_ctx {
@@ -1389,7 +1378,7 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
struct backref_ctx *bctx = ctx;
struct send_ctx *sctx = bctx->sctx;
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
- const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
+ const u64 key = leaf_bytenr >> fs_info->nodesize_bits;
struct btrfs_lru_cache_entry *raw_entry;
struct backref_cache_entry *entry;
@@ -1444,7 +1433,7 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
if (!new_entry)
return;
- new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
+ new_entry->entry.key = leaf_bytenr >> fs_info->nodesize_bits;
new_entry->entry.gen = 0;
new_entry->num_roots = 0;
ULIST_ITER_INIT(&uiter);
@@ -1716,7 +1705,7 @@ static int read_symlink(struct btrfs_root *root,
struct fs_path *dest)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_file_extent_item *ei;
u8 type;
@@ -1733,21 +1722,20 @@ static int read_symlink(struct btrfs_root *root,
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
- if (ret) {
+ return ret;
+ if (unlikely(ret)) {
/*
* An empty symlink inode. Can happen in rare error paths when
* creating a symlink (transaction committed before the inode
* eviction handler removed the symlink inode items and a crash
- * happened in between or the subvol was snapshoted in between).
+ * happened in between or the subvol was snapshotted in between).
* Print an informative message to dmesg/syslog so that the user
* can delete the symlink.
*/
btrfs_err(root->fs_info,
"Found empty symlink inode %llu at root %llu",
ino, btrfs_root_id(root));
- ret = -EIO;
- goto out;
+ return -EIO;
}
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -1758,7 +1746,7 @@ static int read_symlink(struct btrfs_root *root,
btrfs_crit(root->fs_info,
"send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
ino, btrfs_root_id(root), type);
- goto out;
+ return ret;
}
compression = btrfs_file_extent_compression(path->nodes[0], ei);
if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
@@ -1766,17 +1754,13 @@ static int read_symlink(struct btrfs_root *root,
btrfs_crit(root->fs_info,
"send: found symlink extent with compression, ino %llu root %llu compression type %d",
ino, btrfs_root_id(root), compression);
- goto out;
+ return ret;
}
off = btrfs_file_extent_inline_start(ei);
len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
- ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
-
-out:
- btrfs_free_path(path);
- return ret;
+ return fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
}
/*
@@ -1787,8 +1771,7 @@ static int gen_unique_name(struct send_ctx *sctx,
u64 ino, u64 gen,
struct fs_path *dest)
{
- int ret = 0;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *di;
char tmp[64];
int len;
@@ -1811,10 +1794,9 @@ static int gen_unique_name(struct send_ctx *sctx,
path, BTRFS_FIRST_FREE_OBJECTID,
&tmp_name, 0);
btrfs_release_path(path);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
+ if (IS_ERR(di))
+ return PTR_ERR(di);
+
if (di) {
/* not unique, try again */
idx++;
@@ -1823,7 +1805,6 @@ static int gen_unique_name(struct send_ctx *sctx,
if (!sctx->parent_root) {
/* unique */
- ret = 0;
break;
}
@@ -1831,10 +1812,9 @@ static int gen_unique_name(struct send_ctx *sctx,
path, BTRFS_FIRST_FREE_OBJECTID,
&tmp_name, 0);
btrfs_release_path(path);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
+ if (IS_ERR(di))
+ return PTR_ERR(di);
+
if (di) {
/* not unique, try again */
idx++;
@@ -1844,11 +1824,7 @@ static int gen_unique_name(struct send_ctx *sctx,
break;
}
- ret = fs_path_add(dest, tmp, len);
-
-out:
- btrfs_free_path(path);
- return ret;
+ return fs_path_add(dest, tmp, len);
}
enum inode_state {
@@ -1960,7 +1936,7 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
int ret = 0;
struct btrfs_dir_item *di;
struct btrfs_key key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
path = alloc_path_for_send();
@@ -1968,19 +1944,15 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
- if (IS_ERR_OR_NULL(di)) {
- ret = di ? PTR_ERR(di) : -ENOENT;
- goto out;
- }
+ if (IS_ERR_OR_NULL(di))
+ return di ? PTR_ERR(di) : -ENOENT;
+
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
- if (key.type == BTRFS_ROOT_ITEM_KEY) {
- ret = -ENOENT;
- goto out;
- }
+ if (key.type == BTRFS_ROOT_ITEM_KEY)
+ return -ENOENT;
+
*found_inode = key.objectid;
-out:
- btrfs_free_path(path);
return ret;
}
@@ -1994,7 +1966,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int len;
u64 parent_dir;
@@ -2008,16 +1980,14 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
if (ret < 0)
- goto out;
+ return ret;
if (!ret)
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
if (ret || found_key.objectid != ino ||
(found_key.type != BTRFS_INODE_REF_KEY &&
- found_key.type != BTRFS_INODE_EXTREF_KEY)) {
- ret = -ENOENT;
- goto out;
- }
+ found_key.type != BTRFS_INODE_EXTREF_KEY))
+ return -ENOENT;
if (found_key.type == BTRFS_INODE_REF_KEY) {
struct btrfs_inode_ref *iref;
@@ -2038,19 +2008,17 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
}
if (ret < 0)
- goto out;
+ return ret;
btrfs_release_path(path);
if (dir_gen) {
ret = get_inode_gen(root, parent_dir, dir_gen);
if (ret < 0)
- goto out;
+ return ret;
}
*dir = parent_dir;
-out:
- btrfs_free_path(path);
return ret;
}
@@ -2486,7 +2454,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
int ret;
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_root *parent_root = sctx->parent_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
@@ -2498,10 +2466,8 @@ static int send_subvol_begin(struct send_ctx *sctx)
return -ENOMEM;
name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
- if (!name) {
- btrfs_free_path(path);
+ if (!name)
return -ENOMEM;
- }
key.objectid = btrfs_root_id(send_root);
key.type = BTRFS_ROOT_BACKREF_KEY;
@@ -2564,7 +2530,6 @@ static int send_subvol_begin(struct send_ctx *sctx)
tlv_put_failure:
out:
- btrfs_free_path(path);
kfree(name);
return ret;
}
@@ -2715,7 +2680,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
int ret = 0;
struct fs_path *p = NULL;
struct btrfs_inode_item *ii;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
struct btrfs_key key;
int slot;
@@ -2759,7 +2724,6 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
tlv_put_failure:
out:
free_path_for_command(sctx, p);
- btrfs_free_path(path);
return ret;
}
@@ -2769,7 +2733,7 @@ out:
* processing an inode that is a directory and it just got renamed, and existing
* entries in the cache may refer to inodes that have the directory in their
* full path - in which case we would generate outdated paths (pre-rename)
- * for the inodes that the cache entries point to. Instead of prunning the
+ * for the inodes that the cache entries point to. Instead of pruning the
* cache when inserting, do it after we finish processing each inode at
* finish_inode_if_needed().
*/
@@ -2930,7 +2894,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
{
int ret = 0;
int iter_ret = 0;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_key di_key;
@@ -2970,7 +2934,6 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
if (iter_ret < 0)
ret = iter_ret;
- btrfs_free_path(path);
return ret;
}
@@ -3750,7 +3713,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
struct recorded_ref *parent_ref,
const bool is_orphan)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key di_key;
struct btrfs_dir_item *di;
@@ -3771,19 +3734,15 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
- if (ret < 0) {
- goto out;
- } else if (ret > 0) {
- ret = 0;
- goto out;
- }
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
di = btrfs_match_dir_item_name(path, parent_ref->name,
parent_ref->name_len);
- if (!di) {
- ret = 0;
- goto out;
- }
+ if (!di)
+ return 0;
/*
* di_key.objectid has the number of the inode that has a dentry in the
* parent directory with the same name that sctx->cur_ino is being
@@ -3793,26 +3752,22 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
* that it happens after that other inode is renamed.
*/
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
- if (di_key.type != BTRFS_INODE_ITEM_KEY) {
- ret = 0;
- goto out;
- }
+ if (di_key.type != BTRFS_INODE_ITEM_KEY)
+ return 0;
ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
if (ret < 0)
- goto out;
+ return ret;
ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
- goto out;
+ return ret;
}
/* Different inode, no need to delay the rename of sctx->cur_ino */
- if (right_gen != left_gen) {
- ret = 0;
- goto out;
- }
+ if (right_gen != left_gen)
+ return 0;
wdm = get_waiting_dir_move(sctx, di_key.objectid);
if (wdm && !wdm->orphanized) {
@@ -3826,8 +3781,6 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
if (!ret)
ret = 1;
}
-out:
- btrfs_free_path(path);
return ret;
}
@@ -3877,7 +3830,7 @@ static int is_ancestor(struct btrfs_root *root,
bool free_fs_path = false;
int ret = 0;
int iter_ret = 0;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
if (!fs_path) {
@@ -3945,7 +3898,6 @@ static int is_ancestor(struct btrfs_root *root,
ret = iter_ret;
out:
- btrfs_free_path(path);
if (free_fs_path)
fs_path_free(fs_path);
return ret;
@@ -4756,8 +4708,8 @@ static int record_new_ref(struct send_ctx *sctx)
{
int ret;
- ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
- sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
+ ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
+ false, record_new_ref_if_needed, sctx);
if (ret < 0)
return ret;
@@ -4768,9 +4720,8 @@ static int record_deleted_ref(struct send_ctx *sctx)
{
int ret;
- ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
- sctx->cmp_key, 0, record_deleted_ref_if_needed,
- sctx);
+ ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
+ false, record_deleted_ref_if_needed, sctx);
if (ret < 0)
return ret;
@@ -4781,12 +4732,12 @@ static int record_changed_ref(struct send_ctx *sctx)
{
int ret;
- ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
- sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
+ ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
+ false, record_new_ref_if_needed, sctx);
if (ret < 0)
return ret;
- ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
- sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
+ ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
+ false, record_deleted_ref_if_needed, sctx);
if (ret < 0)
return ret;
@@ -4803,7 +4754,7 @@ static int process_all_refs(struct send_ctx *sctx,
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
iterate_inode_ref_t cb;
@@ -4822,8 +4773,7 @@ static int process_all_refs(struct send_ctx *sctx,
} else {
btrfs_err(sctx->send_root->fs_info,
"Wrong command %d in process_all_refs", cmd);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
key.objectid = sctx->cmp_key->objectid;
@@ -4835,15 +4785,14 @@ static int process_all_refs(struct send_ctx *sctx,
found_key.type != BTRFS_INODE_EXTREF_KEY))
break;
- ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
+ ret = iterate_inode_ref(root, path, &found_key, false, cb, sctx);
if (ret < 0)
- goto out;
+ return ret;
}
/* Catch error found during iteration */
- if (iter_ret < 0) {
- ret = iter_ret;
- goto out;
- }
+ if (iter_ret < 0)
+ return iter_ret;
+
btrfs_release_path(path);
/*
@@ -4851,10 +4800,7 @@ static int process_all_refs(struct send_ctx *sctx,
* re-creating this inode and will be rename'ing it into place once we
* rename the parent directory.
*/
- ret = process_recorded_refs(sctx, &pending_move);
-out:
- btrfs_free_path(path);
- return ret;
+ return process_recorded_refs(sctx, &pending_move);
}
static int send_set_xattr(struct send_ctx *sctx,
@@ -5080,7 +5026,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
@@ -5108,7 +5054,6 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
if (iter_ret < 0)
ret = iter_ret;
- btrfs_free_path(path);
return ret;
}
@@ -5254,7 +5199,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
folio_lock(folio);
- if (!folio_test_uptodate(folio)) {
+ if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
btrfs_err(fs_info,
"send: IO error at offset %llu for inode %llu root %llu",
@@ -5656,7 +5601,14 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
+ /*
+ * Do not go through encoded read for bs > ps cases.
+ *
+ * Encoded send is using vmallocated pages as buffer, which we can
+ * not ensure every folio is large enough to contain a block.
+ */
+ if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE &&
+ (sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
BTRFS_FILE_EXTENT_INLINE);
@@ -5766,7 +5718,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
*/
static int send_capabilities(struct send_ctx *sctx)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
unsigned long data_ptr;
@@ -5804,7 +5756,6 @@ static int send_capabilities(struct send_ctx *sctx)
strlen(XATTR_NAME_CAPS), buf, buf_len);
out:
kfree(buf);
- btrfs_free_path(path);
return ret;
}
@@ -5812,7 +5763,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
struct clone_root *clone_root, const u64 disk_byte,
u64 data_offset, u64 offset, u64 len)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret;
struct btrfs_inode_info info;
@@ -5848,7 +5799,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = get_inode_info(clone_root->root, clone_root->ino, &info);
btrfs_release_path(path);
if (ret < 0)
- goto out;
+ return ret;
clone_src_i_size = info.size;
/*
@@ -5878,7 +5829,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
key.offset = clone_root->offset;
ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0 && path->slots[0] > 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
if (key.objectid == clone_root->ino &&
@@ -5899,7 +5850,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
if (ret < 0)
- goto out;
+ return ret;
else if (ret > 0)
break;
continue;
@@ -5936,7 +5887,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_extent_data(sctx, dst_path, offset,
hole_len);
if (ret < 0)
- goto out;
+ return ret;
len -= hole_len;
if (len == 0)
@@ -6007,7 +5958,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_clone(sctx, offset, slen,
clone_root);
if (ret < 0)
- goto out;
+ return ret;
}
ret = send_extent_data(sctx, dst_path,
offset + slen,
@@ -6041,7 +5992,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
}
if (ret < 0)
- goto out;
+ return ret;
len -= clone_len;
if (len == 0)
@@ -6072,8 +6023,6 @@ next:
ret = send_extent_data(sctx, dst_path, offset, len);
else
ret = 0;
-out:
- btrfs_free_path(path);
return ret;
}
@@ -6162,7 +6111,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
{
int ret = 0;
struct btrfs_key key;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
int slot;
struct btrfs_key found_key;
@@ -6188,10 +6137,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
left_type = btrfs_file_extent_type(eb, ei);
- if (left_type != BTRFS_FILE_EXTENT_REG) {
- ret = 0;
- goto out;
- }
+ if (left_type != BTRFS_FILE_EXTENT_REG)
+ return 0;
+
left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
left_len = btrfs_file_extent_num_bytes(eb, ei);
left_offset = btrfs_file_extent_offset(eb, ei);
@@ -6223,11 +6171,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
key.offset = ekey->offset;
ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
if (ret < 0)
- goto out;
- if (ret) {
- ret = 0;
- goto out;
- }
+ return ret;
+ if (ret)
+ return 0;
/*
* Handle special case where the right side has no extents at all.
@@ -6236,11 +6182,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
slot = path->slots[0];
btrfs_item_key_to_cpu(eb, &found_key, slot);
if (found_key.objectid != key.objectid ||
- found_key.type != key.type) {
+ found_key.type != key.type)
/* If we're a hole then just pretend nothing changed */
- ret = (left_disknr) ? 0 : 1;
- goto out;
- }
+ return (left_disknr ? 0 : 1);
/*
* We're now on 2a, 2b or 7.
@@ -6250,10 +6194,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
if (right_type != BTRFS_FILE_EXTENT_REG &&
- right_type != BTRFS_FILE_EXTENT_INLINE) {
- ret = 0;
- goto out;
- }
+ right_type != BTRFS_FILE_EXTENT_INLINE)
+ return 0;
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
right_len = btrfs_file_extent_ram_bytes(eb, ei);
@@ -6266,11 +6208,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
*/
- if (found_key.offset + right_len <= ekey->offset) {
+ if (found_key.offset + right_len <= ekey->offset)
/* If we're a hole just pretend nothing changed */
- ret = (left_disknr) ? 0 : 1;
- goto out;
- }
+ return (left_disknr ? 0 : 1);
/*
* We just wanted to see if when we have an inline extent, what
@@ -6280,10 +6220,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
* compressed extent representing data with a size matching
* the page size (currently the same as sector size).
*/
- if (right_type == BTRFS_FILE_EXTENT_INLINE) {
- ret = 0;
- goto out;
- }
+ if (right_type == BTRFS_FILE_EXTENT_INLINE)
+ return 0;
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
right_offset = btrfs_file_extent_offset(eb, ei);
@@ -6303,17 +6241,15 @@ static int is_extent_unchanged(struct send_ctx *sctx,
*/
if (left_disknr != right_disknr ||
left_offset_fixed != right_offset ||
- left_gen != right_gen) {
- ret = 0;
- goto out;
- }
+ left_gen != right_gen)
+ return 0;
/*
* Go to the next extent.
*/
ret = btrfs_next_item(sctx->parent_root, path);
if (ret < 0)
- goto out;
+ return ret;
if (!ret) {
eb = path->nodes[0];
slot = path->slots[0];
@@ -6324,10 +6260,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
key.offset += right_len;
break;
}
- if (found_key.offset != key.offset + right_len) {
- ret = 0;
- goto out;
- }
+ if (found_key.offset != key.offset + right_len)
+ return 0;
+
key = found_key;
}
@@ -6340,15 +6275,12 @@ static int is_extent_unchanged(struct send_ctx *sctx,
else
ret = 0;
-
-out:
- btrfs_free_path(path);
return ret;
}
static int get_last_extent(struct send_ctx *sctx, u64 offset)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = sctx->send_root;
struct btrfs_key key;
int ret;
@@ -6364,15 +6296,13 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
key.offset = offset;
ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
if (ret < 0)
- goto out;
+ return ret;
ret = 0;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
- goto out;
+ return ret;
sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
-out:
- btrfs_free_path(path);
return ret;
}
@@ -6380,7 +6310,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
const u64 start,
const u64 end)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root *root = sctx->parent_root;
u64 search_start = start;
@@ -6395,7 +6325,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
key.offset = search_start;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0 && path->slots[0] > 0)
path->slots[0]--;
@@ -6408,8 +6338,8 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out;
- else if (ret > 0)
+ return ret;
+ if (ret > 0)
break;
continue;
}
@@ -6431,15 +6361,11 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
search_start = extent_end;
goto next;
}
- ret = 0;
- goto out;
+ return 0;
next:
path->slots[0]++;
}
- ret = 1;
-out:
- btrfs_free_path(path);
- return ret;
+ return 1;
}
static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
@@ -6547,7 +6473,7 @@ static int process_all_extents(struct send_ctx *sctx)
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
@@ -6574,11 +6500,10 @@ static int process_all_extents(struct send_ctx *sctx)
if (iter_ret < 0)
ret = iter_ret;
- btrfs_free_path(path);
return ret;
}
-static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
+static int process_recorded_refs_if_needed(struct send_ctx *sctx, bool at_end,
int *pending_move,
int *refs_processed)
{
@@ -6601,7 +6526,7 @@ out:
return ret;
}
-static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
+static int finish_inode_if_needed(struct send_ctx *sctx, bool at_end)
{
int ret = 0;
struct btrfs_inode_info info;
@@ -7036,7 +6961,7 @@ static int changed_ref(struct send_ctx *sctx,
{
int ret = 0;
- if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
inconsistent_snapshot_error(sctx, result, "reference");
return -EIO;
}
@@ -7064,7 +6989,7 @@ static int changed_xattr(struct send_ctx *sctx,
{
int ret = 0;
- if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
inconsistent_snapshot_error(sctx, result, "xattr");
return -EIO;
}
@@ -7304,7 +7229,7 @@ static int search_key_again(const struct send_ctx *sctx,
*/
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
ASSERT(ret <= 0);
- if (ret > 0) {
+ if (unlikely(ret > 0)) {
btrfs_print_tree(path->nodes[path->lowest_level], false);
btrfs_err(root->fs_info,
"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
@@ -7324,7 +7249,7 @@ static int full_send_tree(struct send_ctx *sctx)
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_key key;
struct btrfs_fs_info *fs_info = send_root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
path = alloc_path_for_send();
if (!path)
@@ -7341,7 +7266,7 @@ static int full_send_tree(struct send_ctx *sctx)
ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret)
goto out_finish;
@@ -7351,7 +7276,7 @@ static int full_send_tree(struct send_ctx *sctx)
ret = changed_cb(path, NULL, &key,
BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0)
- goto out;
+ return ret;
down_read(&fs_info->commit_root_sem);
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
@@ -7370,14 +7295,14 @@ static int full_send_tree(struct send_ctx *sctx)
btrfs_release_path(path);
ret = search_key_again(sctx, send_root, path, &key);
if (ret < 0)
- goto out;
+ return ret;
} else {
up_read(&fs_info->commit_root_sem);
}
ret = btrfs_next_item(send_root, path);
if (ret < 0)
- goto out;
+ return ret;
if (ret) {
ret = 0;
break;
@@ -7385,11 +7310,7 @@ static int full_send_tree(struct send_ctx *sctx)
}
out_finish:
- ret = finish_inode_if_needed(sctx, 1);
-
-out:
- btrfs_free_path(path);
- return ret;
+ return finish_inode_if_needed(sctx, 1);
}
static int replace_node_with_clone(struct btrfs_path *path, int level)
@@ -7644,8 +7565,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
struct btrfs_fs_info *fs_info = left_root->fs_info;
int ret;
int cmp;
- struct btrfs_path *left_path = NULL;
- struct btrfs_path *right_path = NULL;
+ BTRFS_PATH_AUTO_FREE(left_path);
+ BTRFS_PATH_AUTO_FREE(right_path);
struct btrfs_key left_key;
struct btrfs_key right_key;
char *tmp_buf = NULL;
@@ -7918,8 +7839,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
out_unlock:
up_read(&fs_info->commit_root_sem);
out:
- btrfs_free_path(left_path);
- btrfs_free_path(right_path);
kvfree(tmp_buf);
return ret;
}
@@ -7986,7 +7905,7 @@ static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
}
/*
- * Make sure any existing dellaloc is flushed for any root used by a send
+ * Make sure any existing delalloc is flushed for any root used by a send
* operation so that we do not miss any data and we do not race with writeback
* finishing and changing a tree while send is using the tree. This could
* happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 0481c693ac2e..97452fb5d29b 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -479,7 +479,7 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
/*
* On the zoned mode, we always allocate one zone as one chunk.
- * Returning non-zone size alingned bytes here will result in
+ * Returning non-zone size aligned bytes here will result in
* less pressure for the async metadata reclaim process, and it
* will over-commit too much leading to ENOSPC. Align down to the
* zone size to avoid that.
@@ -1528,7 +1528,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
* turned into error mode due to a transaction abort when flushing space
* above, in that case fail with the abort error instead of returning
* success to the caller if we can steal from the global rsv - this is
- * just to have caller fail immeditelly instead of later when trying to
+ * just to have caller fail immediately instead of later when trying to
* modify the fs, making it easier to debug -ENOSPC problems.
*/
if (BTRFS_FS_ERROR(fs_info)) {
@@ -1830,7 +1830,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
space_info->flags,
orig_bytes, flush,
"enospc");
- queue_work(system_unbound_wq, async_work);
+ queue_work(system_dfl_wq, async_work);
}
} else {
list_add_tail(&ticket.list,
@@ -1847,7 +1847,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
need_preemptive_reclaim(fs_info, space_info)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt");
- queue_work(system_unbound_wq,
+ queue_work(system_dfl_wq,
&fs_info->preempt_reclaim_work);
}
}
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index cb4f97833dc3..5ca8d4db6722 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -690,7 +690,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
\
GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \
btrfs_warn(fs_info, \
- "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
+ "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
start, len, folio_pos(folio), \
blocks_per_folio, &bitmap); \
}
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index ee0710eb13fd..ad0552db7c7d 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -13,7 +13,7 @@ struct address_space;
struct folio;
/*
- * Extra info for subpapge bitmap.
+ * Extra info for subpage bitmap.
*
* For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into
* one larger bitmap.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a262b494a89f..d6e496436539 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -133,9 +133,8 @@ enum {
Opt_enospc_debug,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
-#endif
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
Opt_ref_verify,
+ Opt_ref_tracker,
#endif
Opt_err,
};
@@ -257,8 +256,7 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = {
fsparam_flag_no("enospc_debug", Opt_enospc_debug),
#ifdef CONFIG_BTRFS_DEBUG
fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment),
-#endif
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ fsparam_flag("ref_tracker", Opt_ref_tracker),
fsparam_flag("ref_verify", Opt_ref_verify),
#endif
{}
@@ -276,6 +274,7 @@ static int btrfs_parse_compress(struct btrfs_fs_context *ctx,
const struct fs_parameter *param, int opt)
{
const char *string = param->string;
+ int ret;
/*
* Provide the same semantics as older kernels that don't use fs
@@ -294,21 +293,30 @@ static int btrfs_parse_compress(struct btrfs_fs_context *ctx,
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
} else if (btrfs_match_compress_type(string, "zlib", true)) {
ctx->compress_type = BTRFS_COMPRESS_ZLIB;
- ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB,
- string + 4);
+ ret = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB, string + 4,
+ &ctx->compress_level);
+ if (ret < 0)
+ goto error;
btrfs_set_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (btrfs_match_compress_type(string, "lzo", false)) {
+ } else if (btrfs_match_compress_type(string, "lzo", true)) {
ctx->compress_type = BTRFS_COMPRESS_LZO;
- ctx->compress_level = 0;
+ ret = btrfs_compress_str2level(BTRFS_COMPRESS_LZO, string + 3,
+ &ctx->compress_level);
+ if (ret < 0)
+ goto error;
+ if (string[3] == ':' && string[4])
+ btrfs_warn(NULL, "Compression level ignored for LZO");
btrfs_set_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
} else if (btrfs_match_compress_type(string, "zstd", true)) {
ctx->compress_type = BTRFS_COMPRESS_ZSTD;
- ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD,
- string + 4);
+ ret = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD, string + 4,
+ &ctx->compress_level);
+ if (ret < 0)
+ goto error;
btrfs_set_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
@@ -319,10 +327,14 @@ static int btrfs_parse_compress(struct btrfs_fs_context *ctx,
btrfs_clear_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
} else {
- btrfs_err(NULL, "unrecognized compression value %s", string);
- return -EINVAL;
+ ret = -EINVAL;
+ goto error;
}
return 0;
+error:
+ btrfs_err(NULL, "failed to parse compression option '%s'", string);
+ return ret;
+
}
static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
@@ -632,11 +644,12 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -EINVAL;
}
break;
-#endif
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
case Opt_ref_verify:
btrfs_set_opt(ctx->mount_opt, REF_VERIFY);
break;
+ case Opt_ref_tracker:
+ btrfs_set_opt(ctx->mount_opt, REF_TRACKER);
+ break;
#endif
default:
btrfs_err(NULL, "unrecognized mount option '%s'", param->key);
@@ -912,7 +925,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
{
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_dir_item *di;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key location;
struct fscrypt_str name = FSTR_INIT("default", 7);
u64 dir_id;
@@ -929,7 +942,6 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
if (IS_ERR(di)) {
- btrfs_free_path(path);
return PTR_ERR(di);
}
if (!di) {
@@ -938,13 +950,11 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
* it's always been there, but don't freak out, just try and
* mount the top-level subvolume.
*/
- btrfs_free_path(path);
*objectid = BTRFS_FS_TREE_OBJECTID;
return 0;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- btrfs_free_path(path);
*objectid = location.objectid;
return 0;
}
@@ -1079,7 +1089,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_printf(seq, ",compress-force=%s", compress_type);
else
seq_printf(seq, ",compress=%s", compress_type);
- if (info->compress_level)
+ if (info->compress_level && info->compress_type != BTRFS_COMPRESS_LZO)
seq_printf(seq, ":%d", info->compress_level);
}
if (btrfs_test_opt(info, NOSSD))
@@ -1142,6 +1152,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
if (btrfs_test_opt(info, REF_VERIFY))
seq_puts(seq, ",ref_verify");
+ if (btrfs_test_opt(info, REF_TRACKER))
+ seq_puts(seq, ",ref_tracker");
seq_printf(seq, ",subvolid=%llu", btrfs_root_id(BTRFS_I(d_inode(dentry))->root));
subvol_name = btrfs_get_subvol_name_from_objectid(info,
btrfs_root_id(BTRFS_I(d_inode(dentry))->root));
@@ -1268,7 +1280,7 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
/*
- * We need to cleanup all defragable inodes if the autodefragment is
+ * We need to cleanup all defraggable inodes if the autodefragment is
* close or the filesystem is read only.
*/
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
@@ -2260,10 +2272,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
device = btrfs_scan_one_device(vol->name, false);
if (IS_ERR_OR_NULL(device)) {
mutex_unlock(&uuid_mutex);
- if (IS_ERR(device))
- ret = PTR_ERR(device);
- else
- ret = 0;
+ ret = PTR_ERR_OR_ZERO(device);
break;
}
ret = !(device->fs_devices->num_devices ==
@@ -2316,14 +2325,14 @@ static int check_dev_super(struct btrfs_device *dev)
/* Verify the checksum. */
csum_type = btrfs_super_csum_type(sb);
- if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
+ if (unlikely(csum_type != btrfs_super_csum_type(fs_info->super_copy))) {
btrfs_err(fs_info, "csum type changed, has %u expect %u",
csum_type, btrfs_super_csum_type(fs_info->super_copy));
ret = -EUCLEAN;
goto out;
}
- if (btrfs_check_super_csum(fs_info, sb)) {
+ if (unlikely(btrfs_check_super_csum(fs_info, sb))) {
btrfs_err(fs_info, "csum for on-disk super block no longer matches");
ret = -EUCLEAN;
goto out;
@@ -2335,7 +2344,7 @@ static int check_dev_super(struct btrfs_device *dev)
goto out;
last_trans = btrfs_get_last_trans_committed(fs_info);
- if (btrfs_super_generation(sb) != last_trans) {
+ if (unlikely(btrfs_super_generation(sb) != last_trans)) {
btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
btrfs_super_generation(sb), last_trans);
ret = -EUCLEAN;
@@ -2472,9 +2481,6 @@ static int __init btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_ASSERT
", assert=on"
#endif
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- ", ref-verify=on"
-#endif
#ifdef CONFIG_BLK_DEV_ZONED
", zoned=yes"
#else
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 9d398f7a36ad..81f52c1f55ce 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -409,13 +409,17 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
char *buf)
{
ssize_t ret = 0;
+ bool has_output = false;
- if (BTRFS_MIN_BLOCKSIZE != SZ_4K && BTRFS_MIN_BLOCKSIZE != PAGE_SIZE)
- ret += sysfs_emit_at(buf, ret, "%u ", BTRFS_MIN_BLOCKSIZE);
- if (PAGE_SIZE > SZ_4K)
- ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
- ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
-
+ for (u32 cur = BTRFS_MIN_BLOCKSIZE; cur <= BTRFS_MAX_BLOCKSIZE; cur *= 2) {
+ if (!btrfs_supported_blocksize(cur))
+ continue;
+ if (has_output)
+ ret += sysfs_emit_at(buf, ret, " ");
+ ret += sysfs_emit_at(buf, ret, "%u", cur);
+ has_output = true;
+ }
+ ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
BTRFS_ATTR(static_feature, supported_sectorsizes,
diff --git a/fs/btrfs/tests/delayed-refs-tests.c b/fs/btrfs/tests/delayed-refs-tests.c
index 265370e79a54..e2248acb906b 100644
--- a/fs/btrfs/tests/delayed-refs-tests.c
+++ b/fs/btrfs/tests/delayed-refs-tests.c
@@ -997,12 +997,12 @@ int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize)
ret = simple_tests(&trans);
if (!ret) {
- test_msg("running delayed refs merg tests on metadata refs");
+ test_msg("running delayed refs merge tests on metadata refs");
ret = merge_tests(&trans, BTRFS_REF_METADATA);
}
if (!ret) {
- test_msg("running delayed refs merg tests on data refs");
+ test_msg("running delayed refs merge tests on data refs");
ret = merge_tests(&trans, BTRFS_REF_DATA);
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 3a86534c116f..42af6c737c6e 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -1095,7 +1095,7 @@ int btrfs_test_extent_map(void)
/*
* Test a chunk with 2 data stripes one of which
* intersects the physical address of the super block
- * is correctly recognised.
+ * is correctly recognized.
*/
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
.physical_start = SZ_64M - SZ_4M,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c5c0d9cf1a80..89ae0c7a610a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -103,7 +103,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
* | attached to transid N+1. |
* | |
* | To next stage: |
- * | Until all tree blocks are super blocks are |
+ * | Until all tree blocks and super blocks are |
* | written to block devices |
* V |
* Transaction N [[TRANS_STATE_COMPLETED]] V
@@ -404,7 +404,7 @@ loop:
*/
static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int force)
+ bool force)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
@@ -1569,7 +1569,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
* qgroup counters could end up wrong.
*/
ret = btrfs_run_delayed_refs(trans, U64_MAX);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1641,7 +1641,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *parent_root;
struct btrfs_block_rsv *rsv;
struct btrfs_inode *parent_inode = pending->dir;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *dir_item;
struct extent_buffer *tmp;
struct extent_buffer *old;
@@ -1694,10 +1694,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto clear_skip_qgroup;
}
- key.objectid = objectid;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
rsv = trans->block_rsv;
trans->block_rsv = &pending->block_rsv;
trans->bytes_reserved = trans->block_rsv->reserved;
@@ -1714,7 +1710,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* insert the directory item
*/
ret = btrfs_set_inode_index(parent_inode, &index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1735,7 +1731,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_create_qgroup(trans, objectid);
if (ret && ret != -EEXIST) {
- if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) {
+ if (unlikely(ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info))) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1748,13 +1744,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* snapshot
*/
ret = btrfs_run_delayed_items(trans);
- if (ret) { /* Transaction aborted */
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = record_root_in_trans(trans, root, 0);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1789,7 +1785,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
old = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, old, NULL, 0, &old,
BTRFS_NESTING_COW);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_tree_unlock(old);
free_extent_buffer(old);
btrfs_abort_transaction(trans, ret);
@@ -1800,21 +1796,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* clean up in any case */
btrfs_tree_unlock(old);
free_extent_buffer(old);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
/* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
- smp_wmb();
+ smp_mb__after_atomic();
btrfs_set_root_node(new_root_item, tmp);
/* record when the snapshot was created in key.offset */
+ key.objectid = objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = trans->transid;
ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1826,7 +1824,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_root_id(parent_root),
btrfs_ino(parent_inode), index,
&fname.disk_name);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1841,7 +1839,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
ret = btrfs_reloc_post_snapshot(trans, pending);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1864,7 +1862,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, &fname.disk_name,
parent_inode, &key, BTRFS_FT_DIR,
index);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1874,14 +1872,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
inode_set_mtime_to_ts(&parent_inode->vfs_inode,
inode_set_ctime_current(&parent_inode->vfs_inode));
ret = btrfs_update_inode_fallback(trans, parent_inode);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
BTRFS_UUID_KEY_SUBVOL,
objectid);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1889,7 +1887,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
- if (ret && ret != -EEXIST) {
+ if (unlikely(ret && ret != -EEXIST)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1907,7 +1905,6 @@ free_fname:
free_pending:
kfree(new_root_item);
pending->root_item = NULL;
- btrfs_free_path(path);
pending->path = NULL;
return ret;
@@ -2423,7 +2420,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* them.
*
* We needn't worry that this operation will corrupt the snapshots,
- * because all the tree which are snapshoted will be forced to COW
+ * because all the tree which are snapshotted will be forced to COW
* the nodes and leaves.
*/
ret = btrfs_run_delayed_items(trans);
@@ -2657,9 +2654,9 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
- ret = btrfs_drop_snapshot(root, 0, 0);
+ ret = btrfs_drop_snapshot(root, false, false);
else
- ret = btrfs_drop_snapshot(root, 1, 0);
+ ret = btrfs_drop_snapshot(root, true, false);
btrfs_put_root(root);
return (ret < 0) ? 0 : 1;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 0f556f4de3f9..ca30b15ea452 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -183,6 +183,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
/* Only these key->types needs to be checked */
ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY ||
key->type == BTRFS_DIR_INDEX_KEY ||
key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_EXTENT_DATA_KEY);
@@ -1209,7 +1210,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
/*
* For legacy root item, the members starting at generation_v2 will be
* all filled with 0.
- * And since we allow geneartion_v2 as 0, it will still pass the check.
+ * And since we allow generation_v2 as 0, it will still pass the check.
*/
read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
btrfs_item_size(leaf, slot));
@@ -1756,10 +1757,10 @@ static int check_inode_ref(struct extent_buffer *leaf,
while (ptr < end) {
u16 namelen;
- if (unlikely(ptr + sizeof(iref) > end)) {
+ if (unlikely(ptr + sizeof(*iref) > end)) {
inode_ref_err(leaf, slot,
"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
- ptr, end, sizeof(iref));
+ ptr, end, sizeof(*iref));
return -EUCLEAN;
}
@@ -1782,6 +1783,39 @@ static int check_inode_ref(struct extent_buffer *leaf,
return 0;
}
+static int check_inode_extref(struct extent_buffer *leaf,
+ struct btrfs_key *key, struct btrfs_key *prev_key,
+ int slot)
+{
+ unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
+ unsigned long end = ptr + btrfs_item_size(leaf, slot);
+
+ if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
+ return -EUCLEAN;
+
+ while (ptr < end) {
+ struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
+ u16 namelen;
+
+ if (unlikely(ptr + sizeof(*extref)) > end) {
+ inode_ref_err(leaf, slot,
+ "inode extref overflow, ptr %lu end %lu inode_extref size %zu",
+ ptr, end, sizeof(*extref));
+ return -EUCLEAN;
+ }
+
+ namelen = btrfs_inode_extref_name_len(leaf, extref);
+ if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
+ inode_ref_err(leaf, slot,
+ "inode extref overflow, ptr %lu end %lu namelen %u",
+ ptr, end, namelen);
+ return -EUCLEAN;
+ }
+ ptr += sizeof(*extref) + namelen;
+ }
+ return 0;
+}
+
static int check_raid_stripe_extent(const struct extent_buffer *leaf,
const struct btrfs_key *key, int slot)
{
@@ -1893,6 +1927,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
case BTRFS_INODE_REF_KEY:
ret = check_inode_ref(leaf, key, prev_key, slot);
break;
+ case BTRFS_INODE_EXTREF_KEY:
+ ret = check_inode_extref(leaf, key, prev_key, slot);
+ break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(leaf, key, slot);
break;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7d5d90845ca9..6aad6b65522b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
#include "file-item.h"
#include "file.h"
#include "orphan.h"
+#include "print-tree.h"
#include "tree-checker.h"
#define MAX_CONFLICT_INODES 10
@@ -101,17 +102,134 @@ enum {
LOG_WALK_REPLAY_ALL,
};
+/*
+ * The walk control struct is used to pass state down the chain when processing
+ * the log tree. The stage field tells us which part of the log tree processing
+ * we are currently doing.
+ */
+struct walk_control {
+ /*
+ * Signal that we are freeing the metadata extents of a log tree.
+ * This is used at transaction commit time while freeing a log tree.
+ */
+ bool free;
+
+ /*
+ * Signal that we are pinning the metadata extents of a log tree and the
+ * data extents its leaves point to (if using mixed block groups).
+ * This happens in the first stage of log replay to ensure that during
+ * replay, while we are modifying subvolume trees, we don't overwrite
+ * the metadata extents of log trees.
+ */
+ bool pin;
+
+ /* What stage of the replay code we're currently in. */
+ int stage;
+
+ /*
+ * Ignore any items from the inode currently being processed. Needs
+ * to be set every time we find a BTRFS_INODE_ITEM_KEY.
+ */
+ bool ignore_cur_inode;
+
+ /*
+ * The root we are currently replaying to. This is NULL for the replay
+ * stage LOG_WALK_PIN_ONLY.
+ */
+ struct btrfs_root *root;
+
+ /* The log tree we are currently processing (not NULL for any stage). */
+ struct btrfs_root *log;
+
+ /* The transaction handle used for replaying all log trees. */
+ struct btrfs_trans_handle *trans;
+
+ /*
+ * The function that gets used to process blocks we find in the tree.
+ * Note the extent_buffer might not be up to date when it is passed in,
+ * and it must be checked or read if you need the data inside it.
+ */
+ int (*process_func)(struct extent_buffer *eb,
+ struct walk_control *wc, u64 gen, int level);
+
+ /*
+ * The following are used only when stage is >= LOG_WALK_REPLAY_INODES
+ * and by the replay_one_buffer() callback.
+ */
+
+ /* The current log leaf being processed. */
+ struct extent_buffer *log_leaf;
+ /* The key being processed of the current log leaf. */
+ struct btrfs_key log_key;
+ /* The slot being processed of the current log leaf. */
+ int log_slot;
+
+ /* A path used for searches and modifications to subvolume trees. */
+ struct btrfs_path *subvol_path;
+};
+
+static void do_abort_log_replay(struct walk_control *wc, const char *function,
+ unsigned int line, int error, const char *fmt, ...)
+{
+ struct btrfs_fs_info *fs_info = wc->trans->fs_info;
+ struct va_format vaf;
+ va_list args;
+
+ /*
+ * Do nothing if we already aborted, to avoid dumping leaves again which
+ * can be verbose. Further more, only the first call is useful since it
+ * is where we have a problem. Note that we do not use the flag
+ * BTRFS_FS_STATE_TRANS_ABORTED because log replay calls functions that
+ * are outside of tree-log.c that can abort transactions (such as
+ * btrfs_add_link() for example), so if that happens we still want to
+ * dump all log replay specific information below.
+ */
+ if (test_and_set_bit(BTRFS_FS_STATE_LOG_REPLAY_ABORTED, &fs_info->fs_state))
+ return;
+
+ btrfs_abort_transaction(wc->trans, error);
+
+ if (wc->subvol_path->nodes[0]) {
+ btrfs_crit(fs_info,
+ "subvolume (root %llu) leaf currently being processed:",
+ btrfs_root_id(wc->root));
+ btrfs_print_leaf(wc->subvol_path->nodes[0]);
+ }
+
+ if (wc->log_leaf) {
+ btrfs_crit(fs_info,
+ "log tree (for root %llu) leaf currently being processed (slot %d key %llu %u %llu):",
+ btrfs_root_id(wc->root), wc->log_slot,
+ wc->log_key.objectid, wc->log_key.type, wc->log_key.offset);
+ btrfs_print_leaf(wc->log_leaf);
+ }
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "log replay failed in %s:%u for root %llu, stage %d, with error %d: %pV",
+ function, line, btrfs_root_id(wc->root), wc->stage, error, &vaf);
+
+ va_end(args);
+}
+
+/*
+ * Use this for aborting a transaction during log replay while we are down the
+ * call chain of replay_one_buffer(), so that we get a lot more useful
+ * information for debugging issues when compared to a plain call to
+ * btrfs_abort_transaction().
+ */
+#define btrfs_abort_log_replay(wc, error, fmt, args...) \
+ do_abort_log_replay((wc), __func__, __LINE__, (error), fmt, ##args)
+
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
int inode_only,
struct btrfs_log_ctx *ctx);
-static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid);
-static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
+static int link_to_fixup_dir(struct walk_control *wc, u64 objectid);
+static noinline int replay_dir_deletes(struct walk_control *wc,
u64 dirid, bool del_all);
static void wait_log_commit(struct btrfs_root *root, int transid);
@@ -300,53 +418,13 @@ void btrfs_end_log_trans(struct btrfs_root *root)
}
/*
- * the walk control struct is used to pass state down the chain when
- * processing the log tree. The stage field tells us which part
- * of the log tree processing we are currently doing. The others
- * are state fields used for that specific part
- */
-struct walk_control {
- /* should we free the extent on disk when done? This is used
- * at transaction commit time while freeing a log tree
- */
- int free;
-
- /* pin only walk, we record which extents on disk belong to the
- * log trees
- */
- int pin;
-
- /* what stage of the replay code we're currently in */
- int stage;
-
- /*
- * Ignore any items from the inode currently being processed. Needs
- * to be set every time we find a BTRFS_INODE_ITEM_KEY.
- */
- bool ignore_cur_inode;
-
- /* the root we are currently replaying */
- struct btrfs_root *replay_dest;
-
- /* the trans handle for the current replay */
- struct btrfs_trans_handle *trans;
-
- /* the function that gets used to process blocks we find in the
- * tree. Note the extent_buffer might not be up to date when it is
- * passed in, and it must be checked or read if you need the data
- * inside it
- */
- int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
- struct walk_control *wc, u64 gen, int level);
-};
-
-/*
* process_func used to pin down extents, write them or wait on them
*/
-static int process_one_buffer(struct btrfs_root *log,
- struct extent_buffer *eb,
+static int process_one_buffer(struct extent_buffer *eb,
struct walk_control *wc, u64 gen, int level)
{
+ struct btrfs_root *log = wc->log;
+ struct btrfs_trans_handle *trans = wc->trans;
struct btrfs_fs_info *fs_info = log->fs_info;
int ret = 0;
@@ -361,25 +439,36 @@ static int process_one_buffer(struct btrfs_root *log,
};
ret = btrfs_read_extent_buffer(eb, &check);
- if (ret)
+ if (unlikely(ret)) {
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
return ret;
+ }
}
if (wc->pin) {
- ret = btrfs_pin_extent_for_log_replay(wc->trans, eb);
- if (ret)
+ ASSERT(trans != NULL);
+ ret = btrfs_pin_extent_for_log_replay(trans, eb);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
- if (btrfs_buffer_uptodate(eb, gen, 0) &&
- btrfs_header_level(eb) == 0)
+ if (btrfs_buffer_uptodate(eb, gen, false) && level == 0) {
ret = btrfs_exclude_logged_extents(eb);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ }
}
return ret;
}
/*
- * Item overwrite used by log replay. The given eb, slot and key all refer to
- * the source data we are copying out.
+ * Item overwrite used by log replay. The given log tree leaf, slot and key
+ * from the walk_control structure all refer to the source data we are copying
+ * out.
*
* The given root is for the tree we are copying into, and path is a scratch
* path for use in this function (it should be released on entry and will be
@@ -391,12 +480,10 @@ static int process_one_buffer(struct btrfs_root *log,
*
* If the key isn't in the destination yet, a new item is inserted.
*/
-static int overwrite_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+static int overwrite_item(struct walk_control *wc)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
int ret;
u32 item_size;
u64 saved_i_size = 0;
@@ -405,7 +492,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
unsigned long dst_ptr;
struct extent_buffer *dst_eb;
int dst_slot;
- bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
+ const bool is_inode_item = (wc->log_key.type == BTRFS_INODE_ITEM_KEY);
/*
* This is only used during log replay, so the root is always from a
@@ -416,16 +503,21 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
*/
ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID);
- item_size = btrfs_item_size(eb, slot);
- src_ptr = btrfs_item_ptr_offset(eb, slot);
+ item_size = btrfs_item_size(wc->log_leaf, wc->log_slot);
+ src_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot);
/* Look for the key in the destination tree. */
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
- if (ret < 0)
+ ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to search subvolume tree for key (%llu %u %llu) root %llu",
+ wc->log_key.objectid, wc->log_key.type,
+ wc->log_key.offset, btrfs_root_id(root));
return ret;
+ }
- dst_eb = path->nodes[0];
- dst_slot = path->slots[0];
+ dst_eb = wc->subvol_path->nodes[0];
+ dst_slot = wc->subvol_path->slots[0];
if (ret == 0) {
char *src_copy;
@@ -435,16 +527,17 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
goto insert;
if (item_size == 0) {
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return 0;
}
src_copy = kmalloc(item_size, GFP_NOFS);
if (!src_copy) {
- btrfs_release_path(path);
+ btrfs_abort_log_replay(wc, -ENOMEM,
+ "failed to allocate memory for log leaf item");
return -ENOMEM;
}
- read_extent_buffer(eb, src_copy, src_ptr, item_size);
+ read_extent_buffer(wc->log_leaf, src_copy, src_ptr, item_size);
dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot);
ret = memcmp_extent_buffer(dst_eb, src_copy, dst_ptr, item_size);
@@ -456,7 +549,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
* sync
*/
if (ret == 0) {
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return 0;
}
@@ -464,7 +557,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
* We need to load the old nbytes into the inode so when we
* replay the extents we've logged we get the right nbytes.
*/
- if (inode_item) {
+ if (is_inode_item) {
struct btrfs_inode_item *item;
u64 nbytes;
u32 mode;
@@ -472,20 +565,20 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
item = btrfs_item_ptr(dst_eb, dst_slot,
struct btrfs_inode_item);
nbytes = btrfs_inode_nbytes(dst_eb, item);
- item = btrfs_item_ptr(eb, slot,
+ item = btrfs_item_ptr(wc->log_leaf, wc->log_slot,
struct btrfs_inode_item);
- btrfs_set_inode_nbytes(eb, item, nbytes);
+ btrfs_set_inode_nbytes(wc->log_leaf, item, nbytes);
/*
* If this is a directory we need to reset the i_size to
* 0 so that we can set it up properly when replaying
* the rest of the items in this log.
*/
- mode = btrfs_inode_mode(eb, item);
+ mode = btrfs_inode_mode(wc->log_leaf, item);
if (S_ISDIR(mode))
- btrfs_set_inode_size(eb, item, 0);
+ btrfs_set_inode_size(wc->log_leaf, item, 0);
}
- } else if (inode_item) {
+ } else if (is_inode_item) {
struct btrfs_inode_item *item;
u32 mode;
@@ -493,38 +586,41 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
* New inode, set nbytes to 0 so that the nbytes comes out
* properly when we replay the extents.
*/
- item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
- btrfs_set_inode_nbytes(eb, item, 0);
+ item = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(wc->log_leaf, item, 0);
/*
* If this is a directory we need to reset the i_size to 0 so
* that we can set it up properly when replaying the rest of
* the items in this log.
*/
- mode = btrfs_inode_mode(eb, item);
+ mode = btrfs_inode_mode(wc->log_leaf, item);
if (S_ISDIR(mode))
- btrfs_set_inode_size(eb, item, 0);
+ btrfs_set_inode_size(wc->log_leaf, item, 0);
}
insert:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
/* try to insert the key into the destination tree */
- path->skip_release_on_error = 1;
- ret = btrfs_insert_empty_item(trans, root, path,
- key, item_size);
- path->skip_release_on_error = 0;
+ wc->subvol_path->skip_release_on_error = 1;
+ ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &wc->log_key, item_size);
+ wc->subvol_path->skip_release_on_error = 0;
- dst_eb = path->nodes[0];
- dst_slot = path->slots[0];
+ dst_eb = wc->subvol_path->nodes[0];
+ dst_slot = wc->subvol_path->slots[0];
/* make sure any existing item is the correct size */
if (ret == -EEXIST || ret == -EOVERFLOW) {
const u32 found_size = btrfs_item_size(dst_eb, dst_slot);
if (found_size > item_size)
- btrfs_truncate_item(trans, path, item_size, 1);
+ btrfs_truncate_item(trans, wc->subvol_path, item_size, 1);
else if (found_size < item_size)
- btrfs_extend_item(trans, path, item_size - found_size);
+ btrfs_extend_item(trans, wc->subvol_path, item_size - found_size);
} else if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to insert item for key (%llu %u %llu)",
+ wc->log_key.objectid, wc->log_key.type,
+ wc->log_key.offset);
return ret;
}
dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot);
@@ -538,15 +634,15 @@ insert:
* state of the tree found in the subvolume, and i_size is modified
* as it goes
*/
- if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
+ if (is_inode_item && ret == -EEXIST) {
struct btrfs_inode_item *src_item;
struct btrfs_inode_item *dst_item;
src_item = (struct btrfs_inode_item *)src_ptr;
dst_item = (struct btrfs_inode_item *)dst_ptr;
- if (btrfs_inode_generation(eb, src_item) == 0) {
- const u64 ino_size = btrfs_inode_size(eb, src_item);
+ if (btrfs_inode_generation(wc->log_leaf, src_item) == 0) {
+ const u64 ino_size = btrfs_inode_size(wc->log_leaf, src_item);
/*
* For regular files an ino_size == 0 is used only when
@@ -555,21 +651,21 @@ insert:
* case don't set the size of the inode in the fs/subvol
* tree, otherwise we would be throwing valid data away.
*/
- if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
+ if (S_ISREG(btrfs_inode_mode(wc->log_leaf, src_item)) &&
S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
ino_size != 0)
btrfs_set_inode_size(dst_eb, dst_item, ino_size);
goto no_copy;
}
- if (S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
+ if (S_ISDIR(btrfs_inode_mode(wc->log_leaf, src_item)) &&
S_ISDIR(btrfs_inode_mode(dst_eb, dst_item))) {
save_old_i_size = 1;
saved_i_size = btrfs_inode_size(dst_eb, dst_item);
}
}
- copy_extent_buffer(dst_eb, eb, dst_ptr, src_ptr, item_size);
+ copy_extent_buffer(dst_eb, wc->log_leaf, dst_ptr, src_ptr, item_size);
if (save_old_i_size) {
struct btrfs_inode_item *dst_item;
@@ -579,7 +675,7 @@ insert:
}
/* make sure the generation is filled in */
- if (key->type == BTRFS_INODE_ITEM_KEY) {
+ if (is_inode_item) {
struct btrfs_inode_item *dst_item;
dst_item = (struct btrfs_inode_item *)dst_ptr;
@@ -587,7 +683,7 @@ insert:
btrfs_set_inode_generation(dst_eb, dst_item, trans->transid);
}
no_copy:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return 0;
}
@@ -618,292 +714,354 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
* The extent is inserted into the file, dropping any existing extents
* from the file that overlap the new one.
*/
-static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+static noinline int replay_one_extent(struct walk_control *wc)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_fs_info *fs_info = root->fs_info;
int found_type;
u64 extent_end;
- u64 start = key->offset;
+ const u64 start = wc->log_key.offset;
u64 nbytes = 0;
+ u64 csum_start;
+ u64 csum_end;
+ LIST_HEAD(ordered_sums);
+ u64 offset;
+ unsigned long dest_offset;
+ struct btrfs_key ins;
struct btrfs_file_extent_item *item;
struct btrfs_inode *inode = NULL;
- unsigned long size;
int ret = 0;
- item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(eb, item);
+ item = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_file_extent_item);
+ found_type = btrfs_file_extent_type(wc->log_leaf, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- nbytes = btrfs_file_extent_num_bytes(eb, item);
- extent_end = start + nbytes;
-
- /*
- * We don't add to the inodes nbytes if we are prealloc or a
- * hole.
- */
- if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
- nbytes = 0;
+ extent_end = start + btrfs_file_extent_num_bytes(wc->log_leaf, item);
+ /* Holes don't take up space. */
+ if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) != 0)
+ nbytes = btrfs_file_extent_num_bytes(wc->log_leaf, item);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size = btrfs_file_extent_ram_bytes(eb, item);
- nbytes = btrfs_file_extent_ram_bytes(eb, item);
- extent_end = ALIGN(start + size,
- fs_info->sectorsize);
+ nbytes = btrfs_file_extent_ram_bytes(wc->log_leaf, item);
+ extent_end = ALIGN(start + nbytes, fs_info->sectorsize);
} else {
- btrfs_err(fs_info,
- "unexpected extent type=%d root=%llu inode=%llu offset=%llu",
- found_type, btrfs_root_id(root), key->objectid, key->offset);
+ btrfs_abort_log_replay(wc, -EUCLEAN,
+ "unexpected extent type=%d root=%llu inode=%llu offset=%llu",
+ found_type, btrfs_root_id(root),
+ wc->log_key.objectid, wc->log_key.offset);
return -EUCLEAN;
}
- inode = btrfs_iget_logging(key->objectid, root);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
+ inode = btrfs_iget_logging(wc->log_key.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get inode %llu for root %llu",
+ wc->log_key.objectid, btrfs_root_id(root));
+ return ret;
+ }
/*
* first check to see if we already have this extent in the
* file. This must be done before the btrfs_drop_extents run
* so we don't try to drop this extent.
*/
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), start, 0);
+ ret = btrfs_lookup_file_extent(trans, root, wc->subvol_path,
+ btrfs_ino(inode), start, 0);
if (ret == 0 &&
(found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
+ struct extent_buffer *leaf = wc->subvol_path->nodes[0];
struct btrfs_file_extent_item existing;
unsigned long ptr;
- ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
- read_extent_buffer(path->nodes[0], &existing, ptr, sizeof(existing));
+ ptr = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]);
+ read_extent_buffer(leaf, &existing, ptr, sizeof(existing));
/*
* we already have a pointer to this exact extent,
* we don't have to do anything
*/
- if (memcmp_extent_buffer(eb, &existing, (unsigned long)item,
+ if (memcmp_extent_buffer(wc->log_leaf, &existing, (unsigned long)item,
sizeof(existing)) == 0) {
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
goto out;
}
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
/* drop any overlapping extents */
drop_args.start = start;
drop_args.end = extent_end;
drop_args.drop_cache = true;
+ drop_args.path = wc->subvol_path;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to drop extents for inode %llu range [%llu, %llu) root %llu",
+ wc->log_key.objectid, start, extent_end,
+ btrfs_root_id(root));
goto out;
+ }
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- u64 offset;
- unsigned long dest_offset;
- struct btrfs_key ins;
-
- if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
- btrfs_fs_incompat(fs_info, NO_HOLES))
- goto update_inode;
-
- ret = btrfs_insert_empty_item(trans, root, path, key,
- sizeof(*item));
+ if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ /* inline extents are easy, we just overwrite them */
+ ret = overwrite_item(wc);
if (ret)
goto out;
- dest_offset = btrfs_item_ptr_offset(path->nodes[0],
- path->slots[0]);
- copy_extent_buffer(path->nodes[0], eb, dest_offset,
- (unsigned long)item, sizeof(*item));
+ goto update_inode;
+ }
- ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
- ins.type = BTRFS_EXTENT_ITEM_KEY;
- ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
- offset = key->offset - btrfs_file_extent_offset(eb, item);
+ /*
+ * If not an inline extent, it can only be a regular or prealloc one.
+ * We have checked that above and returned -EUCLEAN if not.
+ */
- /*
- * Manually record dirty extent, as here we did a shallow
- * file extent item copy and skip normal backref update,
- * but modifying extent tree all by ourselves.
- * So need to manually record dirty extent for qgroup,
- * as the owner of the file extent changed from log tree
- * (doesn't affect qgroup) to fs/file tree(affects qgroup)
- */
- ret = btrfs_qgroup_trace_extent(trans,
- btrfs_file_extent_disk_bytenr(eb, item),
- btrfs_file_extent_disk_num_bytes(eb, item));
- if (ret < 0)
- goto out;
+ /* A hole and NO_HOLES feature enabled, nothing else to do. */
+ if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) == 0 &&
+ btrfs_fs_incompat(fs_info, NO_HOLES))
+ goto update_inode;
- if (ins.objectid > 0) {
- u64 csum_start;
- u64 csum_end;
- LIST_HEAD(ordered_sums);
+ ret = btrfs_insert_empty_item(trans, root, wc->subvol_path,
+ &wc->log_key, sizeof(*item));
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to insert item with key (%llu %u %llu) root %llu",
+ wc->log_key.objectid, wc->log_key.type,
+ wc->log_key.offset, btrfs_root_id(root));
+ goto out;
+ }
+ dest_offset = btrfs_item_ptr_offset(wc->subvol_path->nodes[0],
+ wc->subvol_path->slots[0]);
+ copy_extent_buffer(wc->subvol_path->nodes[0], wc->log_leaf, dest_offset,
+ (unsigned long)item, sizeof(*item));
- /*
- * is this extent already allocated in the extent
- * allocation tree? If so, just add a reference
- */
- ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
- ins.offset);
- if (ret < 0) {
- goto out;
- } else if (ret == 0) {
- struct btrfs_ref ref = {
- .action = BTRFS_ADD_DELAYED_REF,
- .bytenr = ins.objectid,
- .num_bytes = ins.offset,
- .owning_root = btrfs_root_id(root),
- .ref_root = btrfs_root_id(root),
- };
- btrfs_init_data_ref(&ref, key->objectid, offset,
- 0, false);
- ret = btrfs_inc_extent_ref(trans, &ref);
- if (ret)
- goto out;
- } else {
- /*
- * insert the extent pointer in the extent
- * allocation tree
- */
- ret = btrfs_alloc_logged_file_extent(trans,
- btrfs_root_id(root),
- key->objectid, offset, &ins);
- if (ret)
- goto out;
- }
- btrfs_release_path(path);
+ /*
+ * We have an explicit hole and NO_HOLES is not enabled. We have added
+ * the hole file extent item to the subvolume tree, so we don't have
+ * anything else to do other than update the file extent item range and
+ * update the inode item.
+ */
+ if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) == 0) {
+ btrfs_release_path(wc->subvol_path);
+ goto update_inode;
+ }
- if (btrfs_file_extent_compression(eb, item)) {
- csum_start = ins.objectid;
- csum_end = csum_start + ins.offset;
- } else {
- csum_start = ins.objectid +
- btrfs_file_extent_offset(eb, item);
- csum_end = csum_start +
- btrfs_file_extent_num_bytes(eb, item);
- }
+ ins.objectid = btrfs_file_extent_disk_bytenr(wc->log_leaf, item);
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+ ins.offset = btrfs_file_extent_disk_num_bytes(wc->log_leaf, item);
+ offset = wc->log_key.offset - btrfs_file_extent_offset(wc->log_leaf, item);
- ret = btrfs_lookup_csums_list(root->log_root,
- csum_start, csum_end - 1,
- &ordered_sums, false);
- if (ret < 0)
- goto out;
- ret = 0;
- /*
- * Now delete all existing cums in the csum root that
- * cover our range. We do this because we can have an
- * extent that is completely referenced by one file
- * extent item and partially referenced by another
- * file extent item (like after using the clone or
- * extent_same ioctls). In this case if we end up doing
- * the replay of the one that partially references the
- * extent first, and we do not do the csum deletion
- * below, we can get 2 csum items in the csum tree that
- * overlap each other. For example, imagine our log has
- * the two following file extent items:
- *
- * key (257 EXTENT_DATA 409600)
- * extent data disk byte 12845056 nr 102400
- * extent data offset 20480 nr 20480 ram 102400
- *
- * key (257 EXTENT_DATA 819200)
- * extent data disk byte 12845056 nr 102400
- * extent data offset 0 nr 102400 ram 102400
- *
- * Where the second one fully references the 100K extent
- * that starts at disk byte 12845056, and the log tree
- * has a single csum item that covers the entire range
- * of the extent:
- *
- * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
- *
- * After the first file extent item is replayed, the
- * csum tree gets the following csum item:
- *
- * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
- *
- * Which covers the 20K sub-range starting at offset 20K
- * of our extent. Now when we replay the second file
- * extent item, if we do not delete existing csum items
- * that cover any of its blocks, we end up getting two
- * csum items in our csum tree that overlap each other:
- *
- * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
- * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
- *
- * Which is a problem, because after this anyone trying
- * to lookup up for the checksum of any block of our
- * extent starting at an offset of 40K or higher, will
- * end up looking at the second csum item only, which
- * does not contain the checksum for any block starting
- * at offset 40K or higher of our extent.
- */
- while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums;
- struct btrfs_root *csum_root;
-
- sums = list_first_entry(&ordered_sums,
- struct btrfs_ordered_sum,
- list);
- csum_root = btrfs_csum_root(fs_info,
- sums->logical);
- if (!ret)
- ret = btrfs_del_csums(trans, csum_root,
- sums->logical,
- sums->len);
- if (!ret)
- ret = btrfs_csum_file_blocks(trans,
- csum_root,
- sums);
- list_del(&sums->list);
- kfree(sums);
- }
- if (ret)
- goto out;
- } else {
- btrfs_release_path(path);
+ /*
+ * Manually record dirty extent, as here we did a shallow file extent
+ * item copy and skip normal backref update, but modifying extent tree
+ * all by ourselves. So need to manually record dirty extent for qgroup,
+ * as the owner of the file extent changed from log tree (doesn't affect
+ * qgroup) to fs/file tree (affects qgroup).
+ */
+ ret = btrfs_qgroup_trace_extent(trans, ins.objectid, ins.offset);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to trace extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu",
+ ins.objectid, ins.offset,
+ wc->log_key.objectid, btrfs_root_id(root));
+ goto out;
+ }
+
+ /*
+ * Is this extent already allocated in the extent tree?
+ * If so, just add a reference.
+ */
+ ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to lookup data extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu",
+ ins.objectid, ins.offset,
+ wc->log_key.objectid, btrfs_root_id(root));
+ goto out;
+ } else if (ret == 0) {
+ struct btrfs_ref ref = {
+ .action = BTRFS_ADD_DELAYED_REF,
+ .bytenr = ins.objectid,
+ .num_bytes = ins.offset,
+ .owning_root = btrfs_root_id(root),
+ .ref_root = btrfs_root_id(root),
+ };
+
+ btrfs_init_data_ref(&ref, wc->log_key.objectid, offset, 0, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to increment data extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu",
+ ins.objectid, ins.offset,
+ wc->log_key.objectid,
+ btrfs_root_id(root));
+ goto out;
}
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- /* inline extents are easy, we just overwrite them */
- ret = overwrite_item(trans, root, path, eb, slot, key);
- if (ret)
+ } else {
+ /* Insert the extent pointer in the extent tree. */
+ ret = btrfs_alloc_logged_file_extent(trans, btrfs_root_id(root),
+ wc->log_key.objectid, offset, &ins);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to allocate logged data extent for bytenr %llu disk_num_bytes %llu offset %llu inode %llu root %llu",
+ ins.objectid, ins.offset, offset,
+ wc->log_key.objectid, btrfs_root_id(root));
goto out;
+ }
}
- ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start);
+ btrfs_release_path(wc->subvol_path);
+
+ if (btrfs_file_extent_compression(wc->log_leaf, item)) {
+ csum_start = ins.objectid;
+ csum_end = csum_start + ins.offset;
+ } else {
+ csum_start = ins.objectid + btrfs_file_extent_offset(wc->log_leaf, item);
+ csum_end = csum_start + btrfs_file_extent_num_bytes(wc->log_leaf, item);
+ }
+
+ ret = btrfs_lookup_csums_list(root->log_root, csum_start, csum_end - 1,
+ &ordered_sums, false);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookups csums for range [%llu, %llu) inode %llu root %llu",
+ csum_start, csum_end, wc->log_key.objectid,
+ btrfs_root_id(root));
+ goto out;
+ }
+ ret = 0;
+ /*
+ * Now delete all existing cums in the csum root that cover our range.
+ * We do this because we can have an extent that is completely
+ * referenced by one file extent item and partially referenced by
+ * another file extent item (like after using the clone or extent_same
+ * ioctls). In this case if we end up doing the replay of the one that
+ * partially references the extent first, and we do not do the csum
+ * deletion below, we can get 2 csum items in the csum tree that overlap
+ * each other. For example, imagine our log has the two following file
+ * extent items:
+ *
+ * key (257 EXTENT_DATA 409600)
+ * extent data disk byte 12845056 nr 102400
+ * extent data offset 20480 nr 20480 ram 102400
+ *
+ * key (257 EXTENT_DATA 819200)
+ * extent data disk byte 12845056 nr 102400
+ * extent data offset 0 nr 102400 ram 102400
+ *
+ * Where the second one fully references the 100K extent that starts at
+ * disk byte 12845056, and the log tree has a single csum item that
+ * covers the entire range of the extent:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
+ *
+ * After the first file extent item is replayed, the csum tree gets the
+ * following csum item:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
+ *
+ * Which covers the 20K sub-range starting at offset 20K of our extent.
+ * Now when we replay the second file extent item, if we do not delete
+ * existing csum items that cover any of its blocks, we end up getting
+ * two csum items in our csum tree that overlap each other:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
+ *
+ * Which is a problem, because after this anyone trying to lookup for
+ * the checksum of any block of our extent starting at an offset of 40K
+ * or higher, will end up looking at the second csum item only, which
+ * does not contain the checksum for any block starting at offset 40K or
+ * higher of our extent.
+ */
+ while (!list_empty(&ordered_sums)) {
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_root *csum_root;
+
+ sums = list_first_entry(&ordered_sums, struct btrfs_ordered_sum, list);
+ csum_root = btrfs_csum_root(fs_info, sums->logical);
+ if (!ret) {
+ ret = btrfs_del_csums(trans, csum_root, sums->logical,
+ sums->len);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to delete csums for range [%llu, %llu) inode %llu root %llu",
+ sums->logical,
+ sums->logical + sums->len,
+ wc->log_key.objectid,
+ btrfs_root_id(root));
+ }
+ if (!ret) {
+ ret = btrfs_csum_file_blocks(trans, csum_root, sums);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to add csums for range [%llu, %llu) inode %llu root %llu",
+ sums->logical,
+ sums->logical + sums->len,
+ wc->log_key.objectid,
+ btrfs_root_id(root));
+ }
+ list_del(&sums->list);
+ kfree(sums);
+ }
if (ret)
goto out;
update_inode:
+ ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to set file extent range [%llu, %llu) inode %llu root %llu",
+ start, extent_end, wc->log_key.objectid,
+ btrfs_root_id(root));
+ goto out;
+ }
+
btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found);
ret = btrfs_update_inode(trans, inode);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to update inode %llu root %llu",
+ wc->log_key.objectid, btrfs_root_id(root));
out:
iput(&inode->vfs_inode);
return ret;
}
-static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
+static int unlink_inode_for_log_replay(struct walk_control *wc,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
const struct fscrypt_str *name)
{
+ struct btrfs_trans_handle *trans = wc->trans;
int ret;
ret = btrfs_unlink_inode(trans, dir, inode, name);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to unlink inode %llu parent dir %llu name %.*s root %llu",
+ btrfs_ino(inode), btrfs_ino(dir), name->len,
+ name->name, btrfs_root_id(inode->root));
return ret;
+ }
/*
* Whenever we need to check if a name exists or not, we check the
* fs/subvolume tree. So after an unlink we must run delayed items, so
* that future checks for a name during log replay see that the name
* does not exists anymore.
*/
- return btrfs_run_delayed_items(trans);
+ ret = btrfs_run_delayed_items(trans);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+"failed to run delayed items current inode %llu parent dir %llu name %.*s root %llu",
+ btrfs_ino(inode), btrfs_ino(dir), name->len,
+ name->name, btrfs_root_id(inode->root));
+
+ return ret;
}
/*
@@ -914,39 +1072,44 @@ static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
* This is a helper function to do the unlink of a specific directory
* item
*/
-static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
+static noinline int drop_one_dir_item(struct walk_control *wc,
struct btrfs_inode *dir,
struct btrfs_dir_item *di)
{
struct btrfs_root *root = dir->root;
struct btrfs_inode *inode;
struct fscrypt_str name;
- struct extent_buffer *leaf;
+ struct extent_buffer *leaf = wc->subvol_path->nodes[0];
struct btrfs_key location;
int ret;
- leaf = path->nodes[0];
-
btrfs_dir_item_key_to_cpu(leaf, di, &location);
ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name);
- if (ret)
- return -ENOMEM;
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate name for dir %llu root %llu",
+ btrfs_ino(dir), btrfs_root_id(root));
+ return ret;
+ }
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
inode = btrfs_iget_logging(location.objectid, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to open inode %llu parent dir %llu name %.*s root %llu",
+ location.objectid, btrfs_ino(dir),
+ name.len, name.name, btrfs_root_id(root));
inode = NULL;
goto out;
}
- ret = link_to_fixup_dir(trans, root, path, location.objectid);
+ ret = link_to_fixup_dir(wc, location.objectid);
if (ret)
goto out;
- ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
+ ret = unlink_inode_for_log_replay(wc, dir, inode, &name);
out:
kfree(name.name);
if (inode)
@@ -1013,7 +1176,7 @@ static noinline int backref_in_log(struct btrfs_root *log,
u64 ref_objectid,
const struct fscrypt_str *name)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
path = btrfs_alloc_path();
@@ -1021,12 +1184,10 @@ static noinline int backref_in_log(struct btrfs_root *log,
return -ENOMEM;
ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
- if (ret < 0) {
- goto out;
- } else if (ret == 1) {
- ret = 0;
- goto out;
- }
+ if (ret < 0)
+ return ret;
+ if (ret == 1)
+ return 0;
if (key->type == BTRFS_INODE_EXTREF_KEY)
ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
@@ -1035,20 +1196,15 @@ static noinline int backref_in_log(struct btrfs_root *log,
else
ret = !!btrfs_find_name_in_backref(path->nodes[0],
path->slots[0], name);
-out:
- btrfs_free_path(path);
return ret;
}
-static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_root *log_root,
+static int unlink_refs_not_in_log(struct walk_control *wc,
struct btrfs_key *search_key,
struct btrfs_inode *dir,
- struct btrfs_inode *inode,
- u64 parent_objectid)
+ struct btrfs_inode *inode)
{
- struct extent_buffer *leaf = path->nodes[0];
+ struct extent_buffer *leaf = wc->subvol_path->nodes[0];
unsigned long ptr;
unsigned long ptr_end;
@@ -1057,8 +1213,8 @@ static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans,
* log. If so, we allow them to stay otherwise they must be unlinked as
* a conflict.
*/
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
- ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]);
+ ptr_end = ptr + btrfs_item_size(leaf, wc->subvol_path->slots[0]);
while (ptr < ptr_end) {
struct fscrypt_str victim_name;
struct btrfs_inode_ref *victim_ref;
@@ -1068,22 +1224,34 @@ static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans,
ret = read_alloc_one_name(leaf, (victim_ref + 1),
btrfs_inode_ref_name_len(leaf, victim_ref),
&victim_name);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate name for inode %llu parent dir %llu root %llu",
+ btrfs_ino(inode), btrfs_ino(dir),
+ btrfs_root_id(inode->root));
return ret;
+ }
- ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name);
+ ret = backref_in_log(wc->log, search_key, btrfs_ino(dir), &victim_name);
if (ret) {
- kfree(victim_name.name);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to check if backref is in log tree for inode %llu parent dir %llu name %.*s root %llu",
+ btrfs_ino(inode), btrfs_ino(dir),
+ victim_name.len, victim_name.name,
+ btrfs_root_id(inode->root));
+ kfree(victim_name.name);
return ret;
+ }
+ kfree(victim_name.name);
ptr = (unsigned long)(victim_ref + 1) + victim_name.len;
continue;
}
inc_nlink(&inode->vfs_inode);
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
- ret = unlink_inode_for_log_replay(trans, dir, inode, &victim_name);
+ ret = unlink_inode_for_log_replay(wc, dir, inode, &victim_name);
kfree(victim_name.name);
if (ret)
return ret;
@@ -1093,64 +1261,64 @@ static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans,
return 0;
}
-static int unlink_extrefs_not_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_root *root,
- struct btrfs_root *log_root,
+static int unlink_extrefs_not_in_log(struct walk_control *wc,
struct btrfs_key *search_key,
- struct btrfs_inode *inode,
- u64 inode_objectid,
- u64 parent_objectid)
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode)
{
- struct extent_buffer *leaf = path->nodes[0];
- const unsigned long base = btrfs_item_ptr_offset(leaf, path->slots[0]);
- const u32 item_size = btrfs_item_size(leaf, path->slots[0]);
+ struct extent_buffer *leaf = wc->subvol_path->nodes[0];
+ const unsigned long base = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]);
+ const u32 item_size = btrfs_item_size(leaf, wc->subvol_path->slots[0]);
u32 cur_offset = 0;
while (cur_offset < item_size) {
+ struct btrfs_root *log_root = wc->log;
struct btrfs_inode_extref *extref;
- struct btrfs_inode *victim_parent;
struct fscrypt_str victim_name;
int ret;
extref = (struct btrfs_inode_extref *)(base + cur_offset);
victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
- if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
+ if (btrfs_inode_extref_parent(leaf, extref) != btrfs_ino(dir))
goto next;
ret = read_alloc_one_name(leaf, &extref->name, victim_name.len,
&victim_name);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate name for inode %llu parent dir %llu root %llu",
+ btrfs_ino(inode), btrfs_ino(dir),
+ btrfs_root_id(inode->root));
return ret;
+ }
- search_key->objectid = inode_objectid;
+ search_key->objectid = btrfs_ino(inode);
search_key->type = BTRFS_INODE_EXTREF_KEY;
- search_key->offset = btrfs_extref_hash(parent_objectid,
+ search_key->offset = btrfs_extref_hash(btrfs_ino(dir),
victim_name.name,
victim_name.len);
- ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name);
+ ret = backref_in_log(log_root, search_key, btrfs_ino(dir), &victim_name);
if (ret) {
- kfree(victim_name.name);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to check if backref is in log tree for inode %llu parent dir %llu name %.*s root %llu",
+ btrfs_ino(inode), btrfs_ino(dir),
+ victim_name.len, victim_name.name,
+ btrfs_root_id(inode->root));
+ kfree(victim_name.name);
return ret;
+ }
+ kfree(victim_name.name);
next:
cur_offset += victim_name.len + sizeof(*extref);
continue;
}
- victim_parent = btrfs_iget_logging(parent_objectid, root);
- if (IS_ERR(victim_parent)) {
- kfree(victim_name.name);
- return PTR_ERR(victim_parent);
- }
-
inc_nlink(&inode->vfs_inode);
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
- ret = unlink_inode_for_log_replay(trans, victim_parent, inode,
- &victim_name);
- iput(&victim_parent->vfs_inode);
+ ret = unlink_inode_for_log_replay(wc, dir, inode, &victim_name);
kfree(victim_name.name);
if (ret)
return ret;
@@ -1160,27 +1328,29 @@ next:
return 0;
}
-static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_root *log_root,
+static inline int __add_inode_ref(struct walk_control *wc,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
- u64 inode_objectid, u64 parent_objectid,
u64 ref_index, struct fscrypt_str *name)
{
int ret;
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct btrfs_dir_item *di;
struct btrfs_key search_key;
struct btrfs_inode_extref *extref;
again:
/* Search old style refs */
- search_key.objectid = inode_objectid;
+ search_key.objectid = btrfs_ino(inode);
search_key.type = BTRFS_INODE_REF_KEY;
- search_key.offset = parent_objectid;
- ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ search_key.offset = btrfs_ino(dir);
+ ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0);
if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to search subvolume tree for key (%llu %u %llu) root %llu",
+ search_key.objectid, search_key.type,
+ search_key.offset, btrfs_root_id(root));
return ret;
} else if (ret == 0) {
/*
@@ -1190,52 +1360,60 @@ again:
if (search_key.objectid == search_key.offset)
return 1;
- ret = unlink_refs_not_in_log(trans, path, log_root, &search_key,
- dir, inode, parent_objectid);
+ ret = unlink_refs_not_in_log(wc, &search_key, dir, inode);
if (ret == -EAGAIN)
goto again;
else if (ret)
return ret;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
/* Same search but for extended refs */
- extref = btrfs_lookup_inode_extref(root, path, name, inode_objectid, parent_objectid);
+ extref = btrfs_lookup_inode_extref(root, wc->subvol_path, name,
+ btrfs_ino(inode), btrfs_ino(dir));
if (IS_ERR(extref)) {
return PTR_ERR(extref);
} else if (extref) {
- ret = unlink_extrefs_not_in_log(trans, path, root, log_root,
- &search_key, inode,
- inode_objectid, parent_objectid);
+ ret = unlink_extrefs_not_in_log(wc, &search_key, dir, inode);
if (ret == -EAGAIN)
goto again;
else if (ret)
return ret;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
/* look for a conflicting sequence number */
- di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+ di = btrfs_lookup_dir_index_item(trans, root, wc->subvol_path, btrfs_ino(dir),
ref_index, name, 0);
if (IS_ERR(di)) {
- return PTR_ERR(di);
+ ret = PTR_ERR(di);
+ btrfs_abort_log_replay(wc, ret,
+"failed to lookup dir index item for dir %llu ref_index %llu name %.*s root %llu",
+ btrfs_ino(dir), ref_index, name->len,
+ name->name, btrfs_root_id(root));
+ return ret;
} else if (di) {
- ret = drop_one_dir_item(trans, path, dir, di);
+ ret = drop_one_dir_item(wc, dir, di);
if (ret)
return ret;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
/* look for a conflicting name */
- di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0);
+ di = btrfs_lookup_dir_item(trans, root, wc->subvol_path, btrfs_ino(dir), name, 0);
if (IS_ERR(di)) {
- return PTR_ERR(di);
+ ret = PTR_ERR(di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir item for dir %llu name %.*s root %llu",
+ btrfs_ino(dir), name->len, name->name,
+ btrfs_root_id(root));
+ return ret;
} else if (di) {
- ret = drop_one_dir_item(trans, path, dir, di);
+ ret = drop_one_dir_item(wc, dir, di);
if (ret)
return ret;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return 0;
}
@@ -1288,63 +1466,79 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
* proper unlink of that name (that is, remove its entry from the inode
* reference item and both dir index keys).
*/
-static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_inode *inode,
- struct extent_buffer *log_eb,
- int log_slot,
- struct btrfs_key *key)
+static int unlink_old_inode_refs(struct walk_control *wc, struct btrfs_inode *inode)
{
+ struct btrfs_root *root = wc->root;
int ret;
unsigned long ref_ptr;
unsigned long ref_end;
struct extent_buffer *eb;
again:
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ btrfs_release_path(wc->subvol_path);
+ ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0);
if (ret > 0) {
ret = 0;
goto out;
}
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to search subvolume tree for key (%llu %u %llu) root %llu",
+ wc->log_key.objectid, wc->log_key.type,
+ wc->log_key.offset, btrfs_root_id(root));
goto out;
+ }
- eb = path->nodes[0];
- ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
- ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]);
+ eb = wc->subvol_path->nodes[0];
+ ref_ptr = btrfs_item_ptr_offset(eb, wc->subvol_path->slots[0]);
+ ref_end = ref_ptr + btrfs_item_size(eb, wc->subvol_path->slots[0]);
while (ref_ptr < ref_end) {
struct fscrypt_str name;
u64 parent_id;
- if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY) {
ret = extref_get_fields(eb, ref_ptr, &name,
NULL, &parent_id);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get extref details for inode %llu root %llu",
+ btrfs_ino(inode),
+ btrfs_root_id(root));
+ goto out;
+ }
} else {
- parent_id = key->offset;
+ parent_id = wc->log_key.offset;
ret = ref_get_fields(eb, ref_ptr, &name, NULL);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get ref details for inode %llu parent_id %llu root %llu",
+ btrfs_ino(inode), parent_id,
+ btrfs_root_id(root));
+ goto out;
+ }
}
- if (ret)
- goto out;
- if (key->type == BTRFS_INODE_EXTREF_KEY)
- ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot,
+ if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = !!btrfs_find_name_in_ext_backref(wc->log_leaf, wc->log_slot,
parent_id, &name);
else
- ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name);
+ ret = !!btrfs_find_name_in_backref(wc->log_leaf, wc->log_slot,
+ &name);
if (!ret) {
struct btrfs_inode *dir;
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
dir = btrfs_iget_logging(parent_id, root);
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
kfree(name.name);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir inode %llu root %llu",
+ parent_id, btrfs_root_id(root));
goto out;
}
- ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
+ ret = unlink_inode_for_log_replay(wc, dir, inode, &name);
kfree(name.name);
iput(&dir->vfs_inode);
if (ret)
@@ -1354,56 +1548,51 @@ again:
kfree(name.name);
ref_ptr += name.len;
- if (key->type == BTRFS_INODE_EXTREF_KEY)
+ if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY)
ref_ptr += sizeof(struct btrfs_inode_extref);
else
ref_ptr += sizeof(struct btrfs_inode_ref);
}
ret = 0;
out:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return ret;
}
/*
- * replay one inode back reference item found in the log tree.
- * eb, slot and key refer to the buffer and key found in the log tree.
- * root is the destination we are replaying into, and path is for temp
- * use by this function. (it should be released on return).
+ * Replay one inode back reference item found in the log tree.
+ * Path is for temporary use by this function (it should be released on return).
*/
-static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+static noinline int add_inode_ref(struct walk_control *wc)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct btrfs_inode *dir = NULL;
struct btrfs_inode *inode = NULL;
unsigned long ref_ptr;
unsigned long ref_end;
struct fscrypt_str name = { 0 };
int ret;
- const bool is_extref_item = (key->type == BTRFS_INODE_EXTREF_KEY);
+ const bool is_extref_item = (wc->log_key.type == BTRFS_INODE_EXTREF_KEY);
u64 parent_objectid;
u64 inode_objectid;
u64 ref_index = 0;
int ref_struct_size;
- ref_ptr = btrfs_item_ptr_offset(eb, slot);
- ref_end = ref_ptr + btrfs_item_size(eb, slot);
+ ref_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot);
+ ref_end = ref_ptr + btrfs_item_size(wc->log_leaf, wc->log_slot);
if (is_extref_item) {
struct btrfs_inode_extref *r;
ref_struct_size = sizeof(struct btrfs_inode_extref);
r = (struct btrfs_inode_extref *)ref_ptr;
- parent_objectid = btrfs_inode_extref_parent(eb, r);
+ parent_objectid = btrfs_inode_extref_parent(wc->log_leaf, r);
} else {
ref_struct_size = sizeof(struct btrfs_inode_ref);
- parent_objectid = key->offset;
+ parent_objectid = wc->log_key.offset;
}
- inode_objectid = key->objectid;
+ inode_objectid = wc->log_key.objectid;
/*
* it is possible that we didn't log all the parent directories
@@ -1416,6 +1605,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = PTR_ERR(dir);
if (ret == -ENOENT)
ret = 0;
+ else
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir inode %llu root %llu",
+ parent_objectid, btrfs_root_id(root));
dir = NULL;
goto out;
}
@@ -1423,16 +1616,24 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
inode = btrfs_iget_logging(inode_objectid, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup inode %llu root %llu",
+ inode_objectid, btrfs_root_id(root));
inode = NULL;
goto out;
}
while (ref_ptr < ref_end) {
if (is_extref_item) {
- ret = extref_get_fields(eb, ref_ptr, &name,
+ ret = extref_get_fields(wc->log_leaf, ref_ptr, &name,
&ref_index, &parent_objectid);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get extref details for inode %llu root %llu",
+ btrfs_ino(inode),
+ btrfs_root_id(root));
goto out;
+ }
/*
* parent object can change from one array
* item to another.
@@ -1457,19 +1658,35 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
*/
ret = 0;
goto next;
+ } else {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir inode %llu root %llu",
+ parent_objectid,
+ btrfs_root_id(root));
}
goto out;
}
}
} else {
- ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
- if (ret)
+ ret = ref_get_fields(wc->log_leaf, ref_ptr, &name, &ref_index);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get ref details for inode %llu parent_objectid %llu root %llu",
+ btrfs_ino(inode),
+ parent_objectid,
+ btrfs_root_id(root));
goto out;
+ }
}
- ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
- ref_index, &name);
+ ret = inode_in_dir(root, wc->subvol_path, btrfs_ino(dir),
+ btrfs_ino(inode), ref_index, &name);
if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to check if inode %llu is in dir %llu ref_index %llu name %.*s root %llu",
+ btrfs_ino(inode), btrfs_ino(dir),
+ ref_index, name.len, name.name,
+ btrfs_root_id(root));
goto out;
} else if (ret == 0) {
/*
@@ -1479,9 +1696,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* overwrite any existing back reference, and we don't
* want to create dangling pointers in the directory.
*/
- ret = __add_inode_ref(trans, root, path, log, dir, inode,
- inode_objectid, parent_objectid,
- ref_index, &name);
+ ret = __add_inode_ref(wc, dir, inode, ref_index, &name);
if (ret) {
if (ret == 1)
ret = 0;
@@ -1490,12 +1705,24 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
/* insert our name */
ret = btrfs_add_link(trans, dir, inode, &name, 0, ref_index);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to add link for inode %llu in dir %llu ref_index %llu name %.*s root %llu",
+ btrfs_ino(inode),
+ btrfs_ino(dir), ref_index,
+ name.len, name.name,
+ btrfs_root_id(root));
goto out;
+ }
ret = btrfs_update_inode(trans, inode);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to update inode %llu root %llu",
+ btrfs_ino(inode),
+ btrfs_root_id(root));
goto out;
+ }
}
/* Else, ret == 1, we already have a perfect match, we're done. */
@@ -1517,14 +1744,14 @@ next:
* dir index entries exist for a name but there is no inode reference
* item with the same name.
*/
- ret = unlink_old_inode_refs(trans, root, path, inode, eb, slot, key);
+ ret = unlink_old_inode_refs(wc, inode);
if (ret)
goto out;
/* finally write the back reference in the inode */
- ret = overwrite_item(trans, root, path, eb, slot, key);
+ ret = overwrite_item(wc);
out:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
kfree(name.name);
if (dir)
iput(&dir->vfs_inode);
@@ -1642,26 +1869,22 @@ process_slot:
* number of back refs found. If it goes down to zero, the iput
* will free the inode.
*/
-static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
+static noinline int fixup_inode_link_count(struct walk_control *wc,
struct btrfs_inode *inode)
{
+ struct btrfs_trans_handle *trans = wc->trans;
struct btrfs_root *root = inode->root;
- struct btrfs_path *path;
int ret;
u64 nlink = 0;
const u64 ino = btrfs_ino(inode);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = count_inode_refs(inode, path);
+ ret = count_inode_refs(inode, wc->subvol_path);
if (ret < 0)
goto out;
nlink = ret;
- ret = count_inode_extrefs(inode, path);
+ ret = count_inode_extrefs(inode, wc->subvol_path);
if (ret < 0)
goto out;
@@ -1680,7 +1903,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (inode->vfs_inode.i_nlink == 0) {
if (S_ISDIR(inode->vfs_inode.i_mode)) {
- ret = replay_dir_deletes(trans, root, NULL, path, ino, true);
+ ret = replay_dir_deletes(wc, ino, true);
if (ret)
goto out;
}
@@ -1690,13 +1913,11 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
}
out:
- btrfs_free_path(path);
+ btrfs_release_path(wc->subvol_path);
return ret;
}
-static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path)
+static noinline int fixup_inode_link_counts(struct walk_control *wc)
{
int ret;
struct btrfs_key key;
@@ -1705,48 +1926,50 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = (u64)-1;
while (1) {
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct btrfs_inode *inode;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ ret = btrfs_search_slot(trans, root, &key, wc->subvol_path, -1, 1);
if (ret < 0)
break;
if (ret == 1) {
ret = 0;
- if (path->slots[0] == 0)
+ if (wc->subvol_path->slots[0] == 0)
break;
- path->slots[0]--;
+ wc->subvol_path->slots[0]--;
}
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &key, wc->subvol_path->slots[0]);
if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
- ret = btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, root, wc->subvol_path);
if (ret)
break;
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
inode = btrfs_iget_logging(key.offset, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
break;
}
- ret = fixup_inode_link_count(trans, inode);
+ ret = fixup_inode_link_count(wc, inode);
iput(&inode->vfs_inode);
if (ret)
break;
/*
* fixup on a directory may create new entries,
- * make sure we always look for the highset possible
+ * make sure we always look for the highest possible
* offset
*/
key.offset = (u64)-1;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return ret;
}
@@ -1756,36 +1979,48 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
* count when replay is done. The link count is incremented here
* so the inode won't go away until we check it
*/
-static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 objectid)
+static noinline int link_to_fixup_dir(struct walk_control *wc, u64 objectid)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct btrfs_key key;
int ret = 0;
struct btrfs_inode *inode;
struct inode *vfs_inode;
inode = btrfs_iget_logging(objectid, root);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup inode %llu root %llu",
+ objectid, btrfs_root_id(root));
+ return ret;
+ }
vfs_inode = &inode->vfs_inode;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = objectid;
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &key, 0);
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
if (ret == 0) {
if (!vfs_inode->i_nlink)
set_nlink(vfs_inode, 1);
else
inc_nlink(vfs_inode);
ret = btrfs_update_inode(trans, inode);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to update inode %llu root %llu",
+ objectid, btrfs_root_id(root));
} else if (ret == -EEXIST) {
ret = 0;
+ } else {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to insert fixup item for inode %llu root %llu",
+ objectid, btrfs_root_id(root));
}
iput(vfs_inode);
@@ -1826,9 +2061,8 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
return ret;
}
-static int delete_conflicting_dir_entry(struct btrfs_trans_handle *trans,
+static int delete_conflicting_dir_entry(struct walk_control *wc,
struct btrfs_inode *dir,
- struct btrfs_path *path,
struct btrfs_dir_item *dst_di,
const struct btrfs_key *log_key,
u8 log_flags,
@@ -1836,12 +2070,12 @@ static int delete_conflicting_dir_entry(struct btrfs_trans_handle *trans,
{
struct btrfs_key found_key;
- btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
+ btrfs_dir_item_key_to_cpu(wc->subvol_path->nodes[0], dst_di, &found_key);
/* The existing dentry points to the same inode, don't delete it. */
if (found_key.objectid == log_key->objectid &&
found_key.type == log_key->type &&
found_key.offset == log_key->offset &&
- btrfs_dir_flags(path->nodes[0], dst_di) == log_flags)
+ btrfs_dir_flags(wc->subvol_path->nodes[0], dst_di) == log_flags)
return 1;
/*
@@ -1851,7 +2085,7 @@ static int delete_conflicting_dir_entry(struct btrfs_trans_handle *trans,
if (!exists)
return 0;
- return drop_one_dir_item(trans, path, dir, dst_di);
+ return drop_one_dir_item(wc, dir, dst_di);
}
/*
@@ -1870,13 +2104,10 @@ static int delete_conflicting_dir_entry(struct btrfs_trans_handle *trans,
* Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a
* non-existing inode) and 1 if the name was replayed.
*/
-static noinline int replay_one_name(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb,
- struct btrfs_dir_item *di,
- struct btrfs_key *key)
+static noinline int replay_one_name(struct walk_control *wc, struct btrfs_dir_item *di)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
struct fscrypt_str name = { 0 };
struct btrfs_dir_item *dir_dst_di;
struct btrfs_dir_item *index_dst_di;
@@ -1891,53 +2122,85 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
bool update_size = true;
bool name_added = false;
- dir = btrfs_iget_logging(key->objectid, root);
- if (IS_ERR(dir))
- return PTR_ERR(dir);
+ dir = btrfs_iget_logging(wc->log_key.objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir inode %llu root %llu",
+ wc->log_key.objectid, btrfs_root_id(root));
+ return ret;
+ }
- ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
- if (ret)
+ ret = read_alloc_one_name(wc->log_leaf, di + 1,
+ btrfs_dir_name_len(wc->log_leaf, di), &name);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate name for dir %llu root %llu",
+ btrfs_ino(dir), btrfs_root_id(root));
goto out;
+ }
- log_flags = btrfs_dir_flags(eb, di);
- btrfs_dir_item_key_to_cpu(eb, di, &log_key);
- ret = btrfs_lookup_inode(trans, root, path, &log_key, 0);
- btrfs_release_path(path);
- if (ret < 0)
+ log_flags = btrfs_dir_flags(wc->log_leaf, di);
+ btrfs_dir_item_key_to_cpu(wc->log_leaf, di, &log_key);
+ ret = btrfs_lookup_inode(trans, root, wc->subvol_path, &log_key, 0);
+ btrfs_release_path(wc->subvol_path);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup inode %llu root %llu",
+ log_key.objectid, btrfs_root_id(root));
goto out;
+ }
exists = (ret == 0);
ret = 0;
- dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
- &name, 1);
+ dir_dst_di = btrfs_lookup_dir_item(trans, root, wc->subvol_path,
+ wc->log_key.objectid, &name, 1);
if (IS_ERR(dir_dst_di)) {
ret = PTR_ERR(dir_dst_di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir item for dir %llu name %.*s root %llu",
+ wc->log_key.objectid, name.len, name.name,
+ btrfs_root_id(root));
goto out;
} else if (dir_dst_di) {
- ret = delete_conflicting_dir_entry(trans, dir, path, dir_dst_di,
+ ret = delete_conflicting_dir_entry(wc, dir, dir_dst_di,
&log_key, log_flags, exists);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to delete conflicting entry for dir %llu name %.*s root %llu",
+ btrfs_ino(dir), name.len, name.name,
+ btrfs_root_id(root));
goto out;
+ }
dir_dst_matches = (ret == 1);
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
- index_dst_di = btrfs_lookup_dir_index_item(trans, root, path,
- key->objectid, key->offset,
- &name, 1);
+ index_dst_di = btrfs_lookup_dir_index_item(trans, root, wc->subvol_path,
+ wc->log_key.objectid,
+ wc->log_key.offset, &name, 1);
if (IS_ERR(index_dst_di)) {
ret = PTR_ERR(index_dst_di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir index item for dir %llu name %.*s root %llu",
+ wc->log_key.objectid, name.len, name.name,
+ btrfs_root_id(root));
goto out;
} else if (index_dst_di) {
- ret = delete_conflicting_dir_entry(trans, dir, path, index_dst_di,
+ ret = delete_conflicting_dir_entry(wc, dir, index_dst_di,
&log_key, log_flags, exists);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to delete conflicting entry for dir %llu name %.*s root %llu",
+ btrfs_ino(dir), name.len, name.name,
+ btrfs_root_id(root));
goto out;
+ }
index_dst_matches = (ret == 1);
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
if (dir_dst_matches && index_dst_matches) {
ret = 0;
@@ -1951,9 +2214,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
*/
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_REF_KEY;
- search_key.offset = key->objectid;
+ search_key.offset = wc->log_key.objectid;
ret = backref_in_log(root->log_root, &search_key, 0, &name);
if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to check if ref item is logged for inode %llu dir %llu name %.*s root %llu",
+ search_key.objectid, btrfs_ino(dir),
+ name.len, name.name, btrfs_root_id(root));
goto out;
} else if (ret) {
/* The dentry will be added later. */
@@ -1964,9 +2231,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_EXTREF_KEY;
- search_key.offset = key->objectid;
- ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
+ search_key.offset = btrfs_extref_hash(wc->log_key.objectid, name.name, name.len);
+ ret = backref_in_log(root->log_root, &search_key, wc->log_key.objectid, &name);
if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+"failed to check if extref item is logged for inode %llu dir %llu name %.*s root %llu",
+ search_key.objectid, btrfs_ino(dir),
+ name.len, name.name, btrfs_root_id(root));
goto out;
} else if (ret) {
/* The dentry will be added later. */
@@ -1974,11 +2245,15 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
update_size = false;
goto out;
}
- btrfs_release_path(path);
- ret = insert_one_name(trans, root, key->objectid, key->offset,
+ ret = insert_one_name(trans, root, wc->log_key.objectid, wc->log_key.offset,
&name, &log_key);
- if (ret && ret != -ENOENT && ret != -EEXIST)
+ if (ret && ret != -ENOENT && ret != -EEXIST) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to insert name %.*s for inode %llu dir %llu root %llu",
+ name.len, name.name, log_key.objectid,
+ btrfs_ino(dir), btrfs_root_id(root));
goto out;
+ }
if (!ret)
name_added = true;
update_size = false;
@@ -1988,6 +2263,10 @@ out:
if (!ret && update_size) {
btrfs_i_size_write(dir, dir->vfs_inode.i_size + name.len * 2);
ret = btrfs_update_inode(trans, dir);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to update dir inode %llu root %llu",
+ btrfs_ino(dir), btrfs_root_id(root));
}
kfree(name.name);
iput(&dir->vfs_inode);
@@ -1997,20 +2276,16 @@ out:
}
/* Replay one dir item from a BTRFS_DIR_INDEX_KEY key. */
-static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+static noinline int replay_one_dir_item(struct walk_control *wc)
{
int ret;
struct btrfs_dir_item *di;
/* We only log dir index keys, which only contain a single dir item. */
- ASSERT(key->type == BTRFS_DIR_INDEX_KEY);
+ ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY);
- di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
- ret = replay_one_name(trans, root, path, eb, di, key);
+ di = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_dir_item);
+ ret = replay_one_name(wc, di);
if (ret < 0)
return ret;
@@ -2040,17 +2315,11 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
* to ever delete the parent directory has it would result in stale
* dentries that can never be deleted.
*/
- if (ret == 1 && btrfs_dir_ftype(eb, di) != BTRFS_FT_DIR) {
- struct btrfs_path *fixup_path;
+ if (ret == 1 && btrfs_dir_ftype(wc->log_leaf, di) != BTRFS_FT_DIR) {
struct btrfs_key di_key;
- fixup_path = btrfs_alloc_path();
- if (!fixup_path)
- return -ENOMEM;
-
- btrfs_dir_item_key_to_cpu(eb, di, &di_key);
- ret = link_to_fixup_dir(trans, root, fixup_path, di_key.objectid);
- btrfs_free_path(fixup_path);
+ btrfs_dir_item_key_to_cpu(wc->log_leaf, di, &di_key);
+ ret = link_to_fixup_dir(wc, di_key.objectid);
}
return ret;
@@ -2143,13 +2412,13 @@ out:
* item is not in the log, the item is removed and the inode it points
* to is unlinked
*/
-static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
- struct btrfs_path *path,
+static noinline int check_item_in_log(struct walk_control *wc,
struct btrfs_path *log_path,
struct btrfs_inode *dir,
- struct btrfs_key *dir_key)
+ struct btrfs_key *dir_key,
+ bool force_remove)
{
+ struct btrfs_trans_handle *trans = wc->trans;
struct btrfs_root *root = dir->root;
int ret;
struct extent_buffer *eb;
@@ -2167,21 +2436,31 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
*/
ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY);
- eb = path->nodes[0];
- slot = path->slots[0];
+ eb = wc->subvol_path->nodes[0];
+ slot = wc->subvol_path->slots[0];
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
- if (ret)
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate name for dir %llu index %llu root %llu",
+ btrfs_ino(dir), dir_key->offset,
+ btrfs_root_id(root));
goto out;
+ }
- if (log) {
+ if (!force_remove) {
struct btrfs_dir_item *log_di;
- log_di = btrfs_lookup_dir_index_item(trans, log, log_path,
+ log_di = btrfs_lookup_dir_index_item(trans, wc->log, log_path,
dir_key->objectid,
dir_key->offset, &name, 0);
if (IS_ERR(log_di)) {
ret = PTR_ERR(log_di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir index item for dir %llu index %llu name %.*s root %llu",
+ btrfs_ino(dir), dir_key->offset,
+ name.len, name.name,
+ btrfs_root_id(root));
goto out;
} else if (log_di) {
/* The dentry exists in the log, we have nothing to do. */
@@ -2191,28 +2470,31 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
}
btrfs_dir_item_key_to_cpu(eb, di, &location);
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
btrfs_release_path(log_path);
inode = btrfs_iget_logging(location.objectid, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
inode = NULL;
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup inode %llu root %llu",
+ location.objectid, btrfs_root_id(root));
goto out;
}
- ret = link_to_fixup_dir(trans, root, path, location.objectid);
+ ret = link_to_fixup_dir(wc, location.objectid);
if (ret)
goto out;
inc_nlink(&inode->vfs_inode);
- ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
+ ret = unlink_inode_for_log_replay(wc, dir, inode, &name);
/*
* Unlike dir item keys, dir index keys can only have one name (entry) in
* them, as there are no key collisions since each key has a unique offset
* (an index number), so we're done.
*/
out:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
btrfs_release_path(log_path);
kfree(name.name);
if (inode)
@@ -2220,59 +2502,67 @@ out:
return ret;
}
-static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- const u64 ino)
+static int replay_xattr_deletes(struct walk_control *wc)
{
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_root *root = wc->root;
+ struct btrfs_root *log = wc->log;
struct btrfs_key search_key;
- struct btrfs_path *log_path;
- int i;
+ BTRFS_PATH_AUTO_FREE(log_path);
+ const u64 ino = wc->log_key.objectid;
int nritems;
int ret;
log_path = btrfs_alloc_path();
- if (!log_path)
+ if (!log_path) {
+ btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path");
return -ENOMEM;
+ }
search_key.objectid = ino;
search_key.type = BTRFS_XATTR_ITEM_KEY;
search_key.offset = 0;
again:
- ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret < 0)
+ ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to search xattrs for inode %llu root %llu",
+ ino, btrfs_root_id(root));
goto out;
+ }
process_leaf:
- nritems = btrfs_header_nritems(path->nodes[0]);
- for (i = path->slots[0]; i < nritems; i++) {
+ nritems = btrfs_header_nritems(wc->subvol_path->nodes[0]);
+ for (int i = wc->subvol_path->slots[0]; i < nritems; i++) {
struct btrfs_key key;
struct btrfs_dir_item *di;
struct btrfs_dir_item *log_di;
u32 total_size;
u32 cur;
- btrfs_item_key_to_cpu(path->nodes[0], &key, i);
+ btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &key, i);
if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
ret = 0;
goto out;
}
- di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
- total_size = btrfs_item_size(path->nodes[0], i);
+ di = btrfs_item_ptr(wc->subvol_path->nodes[0], i, struct btrfs_dir_item);
+ total_size = btrfs_item_size(wc->subvol_path->nodes[0], i);
cur = 0;
while (cur < total_size) {
- u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
- u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
+ u16 name_len = btrfs_dir_name_len(wc->subvol_path->nodes[0], di);
+ u16 data_len = btrfs_dir_data_len(wc->subvol_path->nodes[0], di);
u32 this_len = sizeof(*di) + name_len + data_len;
char *name;
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
ret = -ENOMEM;
+ btrfs_abort_log_replay(wc, ret,
+ "failed to allocate memory for name of length %u",
+ name_len);
goto out;
}
- read_extent_buffer(path->nodes[0], name,
+ read_extent_buffer(wc->subvol_path->nodes[0], name,
(unsigned long)(di + 1), name_len);
log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
@@ -2280,40 +2570,59 @@ process_leaf:
btrfs_release_path(log_path);
if (!log_di) {
/* Doesn't exist in log tree, so delete it. */
- btrfs_release_path(path);
- di = btrfs_lookup_xattr(trans, root, path, ino,
+ btrfs_release_path(wc->subvol_path);
+ di = btrfs_lookup_xattr(trans, root, wc->subvol_path, ino,
name, name_len, -1);
- kfree(name);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup xattr with name %.*s for inode %llu root %llu",
+ name_len, name, ino,
+ btrfs_root_id(root));
+ kfree(name);
goto out;
}
ASSERT(di);
ret = btrfs_delete_one_dir_name(trans, root,
- path, di);
- if (ret)
+ wc->subvol_path, di);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to delete xattr with name %.*s for inode %llu root %llu",
+ name_len, name, ino,
+ btrfs_root_id(root));
+ kfree(name);
goto out;
- btrfs_release_path(path);
+ }
+ btrfs_release_path(wc->subvol_path);
+ kfree(name);
search_key = key;
goto again;
}
- kfree(name);
if (IS_ERR(log_di)) {
ret = PTR_ERR(log_di);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup xattr in log tree with name %.*s for inode %llu root %llu",
+ name_len, name, ino,
+ btrfs_root_id(root));
+ kfree(name);
goto out;
}
+ kfree(name);
cur += this_len;
di = (struct btrfs_dir_item *)((char *)di + this_len);
}
}
- ret = btrfs_next_leaf(root, path);
+ ret = btrfs_next_leaf(root, wc->subvol_path);
if (ret > 0)
ret = 0;
else if (ret == 0)
goto process_leaf;
+ else
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get next leaf in subvolume root %llu",
+ btrfs_root_id(root));
out:
- btrfs_free_path(log_path);
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
return ret;
}
@@ -2328,12 +2637,11 @@ out:
* Anything we don't find in the log is unlinked and removed from the
* directory.
*/
-static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
+static noinline int replay_dir_deletes(struct walk_control *wc,
u64 dirid, bool del_all)
{
+ struct btrfs_root *root = wc->root;
+ struct btrfs_root *log = (del_all ? NULL : wc->log);
u64 range_start;
u64 range_end;
int ret = 0;
@@ -2345,8 +2653,10 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
dir_key.objectid = dirid;
dir_key.type = BTRFS_DIR_INDEX_KEY;
log_path = btrfs_alloc_path();
- if (!log_path)
+ if (!log_path) {
+ btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path");
return -ENOMEM;
+ }
dir = btrfs_iget_logging(dirid, root);
/*
@@ -2358,6 +2668,10 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
ret = PTR_ERR(dir);
if (ret == -ENOENT)
ret = 0;
+ else
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup dir inode %llu root %llu",
+ dirid, btrfs_root_id(root));
return ret;
}
@@ -2367,32 +2681,46 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (del_all)
range_end = (u64)-1;
else {
- ret = find_dir_range(log, path, dirid,
+ ret = find_dir_range(log, wc->subvol_path, dirid,
&range_start, &range_end);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to find range for dir %llu in log tree root %llu",
+ dirid, btrfs_root_id(root));
goto out;
- else if (ret > 0)
+ } else if (ret > 0) {
break;
+ }
}
dir_key.offset = range_start;
while (1) {
int nritems;
- ret = btrfs_search_slot(NULL, root, &dir_key, path,
- 0, 0);
- if (ret < 0)
+ ret = btrfs_search_slot(NULL, root, &dir_key,
+ wc->subvol_path, 0, 0);
+ if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to search root %llu for key (%llu %u %llu)",
+ btrfs_root_id(root),
+ dir_key.objectid, dir_key.type,
+ dir_key.offset);
goto out;
+ }
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 1)
+ nritems = btrfs_header_nritems(wc->subvol_path->nodes[0]);
+ if (wc->subvol_path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(root, wc->subvol_path);
+ if (ret == 1) {
break;
- else if (ret < 0)
+ } else if (ret < 0) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to get next leaf in subvolume root %llu",
+ btrfs_root_id(root));
goto out;
+ }
}
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
+ btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &found_key,
+ wc->subvol_path->slots[0]);
if (found_key.objectid != dirid ||
found_key.type != dir_key.type) {
ret = 0;
@@ -2402,23 +2730,21 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (found_key.offset > range_end)
break;
- ret = check_item_in_log(trans, log, path,
- log_path, dir,
- &found_key);
+ ret = check_item_in_log(wc, log_path, dir, &found_key, del_all);
if (ret)
goto out;
if (found_key.offset == (u64)-1)
break;
dir_key.offset = found_key.offset + 1;
}
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
if (range_end == (u64)-1)
break;
range_start = range_end + 1;
}
ret = 0;
out:
- btrfs_release_path(path);
+ btrfs_release_path(wc->subvol_path);
btrfs_free_path(log_path);
iput(&dir->vfs_inode);
return ret;
@@ -2435,7 +2761,7 @@ out:
* only in the log (references come from either directory items or inode
* back refs).
*/
-static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+static int replay_one_buffer(struct extent_buffer *eb,
struct walk_control *wc, u64 gen, int level)
{
int nritems;
@@ -2443,33 +2769,44 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
.transid = gen,
.level = level
};
- struct btrfs_path *path;
- struct btrfs_root *root = wc->replay_dest;
- struct btrfs_key key;
- int i;
+ struct btrfs_root *root = wc->root;
+ struct btrfs_trans_handle *trans = wc->trans;
int ret;
- ret = btrfs_read_extent_buffer(eb, &check);
- if (ret)
- return ret;
-
- level = btrfs_header_level(eb);
-
if (level != 0)
return 0;
- path = btrfs_alloc_path();
- if (!path)
+ /*
+ * Set to NULL since it was not yet read and in case we abort log replay
+ * on error, we have no valid log tree leaf to dump.
+ */
+ wc->log_leaf = NULL;
+ ret = btrfs_read_extent_buffer(eb, &check);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to read log tree leaf %llu for root %llu",
+ eb->start, btrfs_root_id(root));
+ return ret;
+ }
+
+ ASSERT(wc->subvol_path == NULL);
+ wc->subvol_path = btrfs_alloc_path();
+ if (!wc->subvol_path) {
+ btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path");
return -ENOMEM;
+ }
+
+ wc->log_leaf = eb;
nritems = btrfs_header_nritems(eb);
- for (i = 0; i < nritems; i++) {
+ for (wc->log_slot = 0; wc->log_slot < nritems; wc->log_slot++) {
struct btrfs_inode_item *inode_item;
- btrfs_item_key_to_cpu(eb, &key, i);
+ btrfs_item_key_to_cpu(eb, &wc->log_key, wc->log_slot);
- if (key.type == BTRFS_INODE_ITEM_KEY) {
- inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item);
+ if (wc->log_key.type == BTRFS_INODE_ITEM_KEY) {
+ inode_item = btrfs_item_ptr(eb, wc->log_slot,
+ struct btrfs_inode_item);
/*
* An inode with no links is either:
*
@@ -2498,22 +2835,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
}
/* Inode keys are done during the first stage. */
- if (key.type == BTRFS_INODE_ITEM_KEY &&
+ if (wc->log_key.type == BTRFS_INODE_ITEM_KEY &&
wc->stage == LOG_WALK_REPLAY_INODES) {
u32 mode;
- ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid);
+ ret = replay_xattr_deletes(wc);
if (ret)
break;
mode = btrfs_inode_mode(eb, inode_item);
if (S_ISDIR(mode)) {
- ret = replay_dir_deletes(wc->trans, root, log, path,
- key.objectid, false);
+ ret = replay_dir_deletes(wc, wc->log_key.objectid, false);
if (ret)
break;
}
- ret = overwrite_item(wc->trans, root, path,
- eb, i, &key);
+ ret = overwrite_item(wc);
if (ret)
break;
@@ -2530,9 +2865,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_inode *inode;
u64 from;
- inode = btrfs_iget_logging(key.objectid, root);
+ inode = btrfs_iget_logging(wc->log_key.objectid, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
+ btrfs_abort_log_replay(wc, ret,
+ "failed to lookup inode %llu root %llu",
+ wc->log_key.objectid,
+ btrfs_root_id(root));
break;
}
from = ALIGN(i_size_read(&inode->vfs_inode),
@@ -2540,21 +2879,31 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
drop_args.start = from;
drop_args.end = (u64)-1;
drop_args.drop_cache = true;
- ret = btrfs_drop_extents(wc->trans, root, inode,
- &drop_args);
- if (!ret) {
+ drop_args.path = wc->subvol_path;
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
+ if (ret) {
+ btrfs_abort_log_replay(wc, ret,
+ "failed to drop extents for inode %llu root %llu offset %llu",
+ btrfs_ino(inode),
+ btrfs_root_id(root),
+ from);
+ } else {
inode_sub_bytes(&inode->vfs_inode,
drop_args.bytes_found);
/* Update the inode's nbytes. */
- ret = btrfs_update_inode(wc->trans, inode);
+ ret = btrfs_update_inode(trans, inode);
+ if (ret)
+ btrfs_abort_log_replay(wc, ret,
+ "failed to update inode %llu root %llu",
+ btrfs_ino(inode),
+ btrfs_root_id(root));
}
iput(&inode->vfs_inode);
if (ret)
break;
}
- ret = link_to_fixup_dir(wc->trans, root,
- path, key.objectid);
+ ret = link_to_fixup_dir(wc, wc->log_key.objectid);
if (ret)
break;
}
@@ -2562,10 +2911,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (wc->ignore_cur_inode)
continue;
- if (key.type == BTRFS_DIR_INDEX_KEY &&
+ if (wc->log_key.type == BTRFS_DIR_INDEX_KEY &&
wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
- ret = replay_one_dir_item(wc->trans, root, path,
- eb, i, &key);
+ ret = replay_one_dir_item(wc);
if (ret)
break;
}
@@ -2574,20 +2922,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
continue;
/* these keys are simply copied */
- if (key.type == BTRFS_XATTR_ITEM_KEY) {
- ret = overwrite_item(wc->trans, root, path,
- eb, i, &key);
+ if (wc->log_key.type == BTRFS_XATTR_ITEM_KEY) {
+ ret = overwrite_item(wc);
if (ret)
break;
- } else if (key.type == BTRFS_INODE_REF_KEY ||
- key.type == BTRFS_INODE_EXTREF_KEY) {
- ret = add_inode_ref(wc->trans, root, log, path,
- eb, i, &key);
+ } else if (wc->log_key.type == BTRFS_INODE_REF_KEY ||
+ wc->log_key.type == BTRFS_INODE_EXTREF_KEY) {
+ ret = add_inode_ref(wc);
if (ret)
break;
- } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
- ret = replay_one_extent(wc->trans, root, path,
- eb, i, &key);
+ } else if (wc->log_key.type == BTRFS_EXTENT_DATA_KEY) {
+ ret = replay_one_extent(wc);
if (ret)
break;
}
@@ -2598,55 +2943,55 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
* older kernel with such keys, ignore them.
*/
}
- btrfs_free_path(path);
+ btrfs_free_path(wc->subvol_path);
+ wc->subvol_path = NULL;
return ret;
}
-/*
- * Correctly adjust the reserved bytes occupied by a log tree extent buffer
- */
-static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
-{
- struct btrfs_block_group *cache;
-
- cache = btrfs_lookup_block_group(fs_info, start);
- if (!cache) {
- btrfs_err(fs_info, "unable to find block group for %llu", start);
- return -ENOENT;
- }
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->reserved -= fs_info->nodesize;
- cache->space_info->bytes_reserved -= fs_info->nodesize;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-
- btrfs_put_block_group(cache);
-
- return 0;
-}
-
static int clean_log_buffer(struct btrfs_trans_handle *trans,
struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct btrfs_block_group *bg;
+
btrfs_tree_lock(eb);
btrfs_clear_buffer_dirty(trans, eb);
wait_on_extent_buffer_writeback(eb);
btrfs_tree_unlock(eb);
- if (trans)
- return btrfs_pin_reserved_extent(trans, eb);
+ if (trans) {
+ int ret;
- return unaccount_log_buffer(eb->fs_info, eb->start);
+ ret = btrfs_pin_reserved_extent(trans, eb);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ bg = btrfs_lookup_block_group(fs_info, eb->start);
+ if (!bg) {
+ btrfs_err(fs_info, "unable to find block group for %llu", eb->start);
+ btrfs_handle_fs_error(fs_info, -ENOENT, NULL);
+ return -ENOENT;
+ }
+
+ spin_lock(&bg->space_info->lock);
+ spin_lock(&bg->lock);
+ bg->reserved -= fs_info->nodesize;
+ bg->space_info->bytes_reserved -= fs_info->nodesize;
+ spin_unlock(&bg->lock);
+ spin_unlock(&bg->space_info->lock);
+
+ btrfs_put_block_group(bg);
+
+ return 0;
}
-static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level,
- struct walk_control *wc)
+static noinline int walk_down_log_tree(struct btrfs_path *path, int *level,
+ struct walk_control *wc)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_fs_info *fs_info = wc->log->fs_info;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
@@ -2674,12 +3019,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
next = btrfs_find_create_tree_block(fs_info, bytenr,
btrfs_header_owner(cur),
*level - 1);
- if (IS_ERR(next))
- return PTR_ERR(next);
+ if (IS_ERR(next)) {
+ ret = PTR_ERR(next);
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
+ }
if (*level == 1) {
- ret = wc->process_func(root, next, wc, ptr_gen,
- *level - 1);
+ ret = wc->process_func(next, wc, ptr_gen, *level - 1);
if (ret) {
free_extent_buffer(next);
return ret;
@@ -2690,6 +3040,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
ret = btrfs_read_extent_buffer(next, &check);
if (ret) {
free_extent_buffer(next);
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
return ret;
}
@@ -2705,6 +3059,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
ret = btrfs_read_extent_buffer(next, &check);
if (ret) {
free_extent_buffer(next);
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
return ret;
}
@@ -2721,10 +3079,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return 0;
}
-static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level,
- struct walk_control *wc)
+static noinline int walk_up_log_tree(struct btrfs_path *path, int *level,
+ struct walk_control *wc)
{
int i;
int slot;
@@ -2738,14 +3094,14 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level == 0);
return 0;
} else {
- ret = wc->process_func(root, path->nodes[*level], wc,
+ ret = wc->process_func(path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]),
*level);
if (ret)
return ret;
if (wc->free) {
- ret = clean_log_buffer(trans, path->nodes[*level]);
+ ret = clean_log_buffer(wc->trans, path->nodes[*level]);
if (ret)
return ret;
}
@@ -2762,13 +3118,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
* the tree freeing any blocks that have a ref count of zero after being
* decremented.
*/
-static int walk_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, struct walk_control *wc)
+static int walk_log_tree(struct walk_control *wc)
{
+ struct btrfs_root *log = wc->log;
int ret = 0;
int wret;
int level;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int orig_level;
path = btrfs_alloc_path();
@@ -2782,36 +3138,30 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
path->slots[level] = 0;
while (1) {
- wret = walk_down_log_tree(trans, log, path, &level, wc);
+ wret = walk_down_log_tree(path, &level, wc);
if (wret > 0)
break;
- if (wret < 0) {
- ret = wret;
- goto out;
- }
+ if (wret < 0)
+ return wret;
- wret = walk_up_log_tree(trans, log, path, &level, wc);
+ wret = walk_up_log_tree(path, &level, wc);
if (wret > 0)
break;
- if (wret < 0) {
- ret = wret;
- goto out;
- }
+ if (wret < 0)
+ return wret;
}
/* was the root node processed? if not, catch it here */
if (path->nodes[orig_level]) {
- ret = wc->process_func(log, path->nodes[orig_level], wc,
+ ret = wc->process_func(path->nodes[orig_level], wc,
btrfs_header_generation(path->nodes[orig_level]),
orig_level);
if (ret)
- goto out;
+ return ret;
if (wc->free)
- ret = clean_log_buffer(trans, path->nodes[orig_level]);
+ ret = clean_log_buffer(wc->trans, path->nodes[orig_level]);
}
-out:
- btrfs_free_path(path);
return ret;
}
@@ -3220,7 +3570,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
ret = write_all_supers(fs_info, 1);
mutex_unlock(&fs_info->tree_log_mutex);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
goto out_wake_log_root;
@@ -3272,12 +3622,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
{
int ret;
struct walk_control wc = {
- .free = 1,
- .process_func = process_one_buffer
+ .free = true,
+ .process_func = process_one_buffer,
+ .log = log,
+ .trans = trans,
};
if (log->node) {
- ret = walk_log_tree(trans, log, &wc);
+ ret = walk_log_tree(&wc);
if (ret) {
/*
* We weren't able to traverse the entire log tree, the
@@ -3476,7 +3828,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
/*
* The inode was previously logged and then evicted, set logged_trans to
- * the current transacion's ID, to avoid future tree searches as long as
+ * the current transaction's ID, to avoid future tree searches as long as
* the inode is not evicted again.
*/
spin_lock(&inode->lock);
@@ -3547,13 +3899,13 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name,
struct btrfs_inode *dir, u64 index)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
ret = inode_logged(trans, dir, NULL);
if (ret == 0)
return;
- else if (ret < 0) {
+ if (ret < 0) {
btrfs_set_log_full_commit(trans);
return;
}
@@ -3567,7 +3919,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
ret = join_running_log_trans(root);
ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret);
if (WARN_ON(ret))
- goto out;
+ return;
mutex_lock(&dir->log_mutex);
@@ -3577,8 +3929,6 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (ret < 0)
btrfs_set_log_full_commit(trans);
btrfs_end_log_trans(root);
-out:
- btrfs_free_path(path);
}
/* see comments for btrfs_del_dir_entries_in_log */
@@ -3691,8 +4041,7 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
struct btrfs_key *ins_keys;
u32 *ins_sizes;
- ins_data = kmalloc(count * sizeof(u32) +
- count * sizeof(struct btrfs_key), GFP_NOFS);
+ ins_data = kmalloc_array(count, sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS);
if (!ins_data)
return -ENOMEM;
@@ -4255,7 +4604,7 @@ static int truncate_inode_items(struct btrfs_trans_handle *trans,
static void fill_inode_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf,
struct btrfs_inode_item *item,
- struct inode *inode, int log_inode_only,
+ struct inode *inode, bool log_inode_only,
u64 logged_isize)
{
u64 flags;
@@ -4351,7 +4700,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode,
- 0, 0);
+ false, 0);
btrfs_release_path(path);
return 0;
}
@@ -4455,8 +4804,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
src = src_path->nodes[0];
- ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
- nr * sizeof(u32), GFP_NOFS);
+ ins_data = kmalloc_array(nr, sizeof(struct btrfs_key) + sizeof(u32), GFP_NOFS);
if (!ins_data)
return -ENOMEM;
@@ -4857,7 +5205,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct btrfs_key key;
const u64 i_size = i_size_read(&inode->vfs_inode);
const u64 ino = btrfs_ino(inode);
- struct btrfs_path *dst_path = NULL;
+ BTRFS_PATH_AUTO_FREE(dst_path);
bool dropped_extents = false;
u64 truncate_offset = i_size;
struct extent_buffer *leaf;
@@ -4975,7 +5323,6 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
start_slot, ins_nr, 1, 0, ctx);
out:
btrfs_release_path(path);
- btrfs_free_path(dst_path);
return ret;
}
@@ -5348,7 +5695,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
u64 *other_ino, u64 *other_parent)
{
int ret;
- struct btrfs_path *search_path;
+ BTRFS_PATH_AUTO_FREE(search_path);
char *name = NULL;
u32 name_len = 0;
u32 item_size = btrfs_item_size(eb, slot);
@@ -5433,7 +5780,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
}
ret = 0;
out:
- btrfs_free_path(search_path);
kfree(name);
return ret;
}
@@ -6161,8 +6507,7 @@ static int log_delayed_insertion_items(struct btrfs_trans_handle *trans,
if (!first)
return 0;
- ins_data = kmalloc(max_batch_size * sizeof(u32) +
- max_batch_size * sizeof(struct btrfs_key), GFP_NOFS);
+ ins_data = kmalloc_array(max_batch_size, sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS);
if (!ins_data)
return -ENOMEM;
ins_sizes = (u32 *)ins_data;
@@ -6816,7 +7161,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root *root = inode->root;
const u64 ino = btrfs_ino(inode);
@@ -6832,7 +7177,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
while (true) {
struct extent_buffer *leaf = path->nodes[0];
@@ -6844,8 +7189,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out;
- else if (ret > 0)
+ return ret;
+ if (ret > 0)
break;
continue;
}
@@ -6903,10 +7248,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
* at both parents and the old parent B would still
* exist.
*/
- if (IS_ERR(dir_inode)) {
- ret = PTR_ERR(dir_inode);
- goto out;
- }
+ if (IS_ERR(dir_inode))
+ return PTR_ERR(dir_inode);
if (!need_log_inode(trans, dir_inode)) {
btrfs_add_delayed_iput(dir_inode);
@@ -6919,14 +7262,11 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
ret = log_new_dir_dentries(trans, dir_inode, ctx);
btrfs_add_delayed_iput(dir_inode);
if (ret)
- goto out;
+ return ret;
}
path->slots[0]++;
}
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
static int log_new_ancestors(struct btrfs_trans_handle *trans,
@@ -7037,7 +7377,7 @@ static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
const u64 ino = btrfs_ino(inode);
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key search_key;
int ret;
@@ -7058,7 +7398,7 @@ static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
again:
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret == 0)
path->slots[0]++;
@@ -7070,8 +7410,8 @@ again:
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out;
- else if (ret > 0)
+ return ret;
+ if (ret > 0)
break;
continue;
}
@@ -7088,10 +7428,8 @@ again:
* this loop, etc). So just return some error to fallback to
* a transaction commit.
*/
- if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
- ret = -EMLINK;
- goto out;
- }
+ if (found_key.type == BTRFS_INODE_EXTREF_KEY)
+ return -EMLINK;
/*
* Logging ancestors needs to do more searches on the fs/subvol
@@ -7103,14 +7441,11 @@ again:
ret = log_new_ancestors(trans, root, path, ctx);
if (ret)
- goto out;
+ return ret;
btrfs_release_path(path);
goto again;
}
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
/*
@@ -7290,10 +7625,12 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
}
wc.trans = trans;
- wc.pin = 1;
+ wc.pin = true;
+ wc.log = log_root_tree;
- ret = walk_log_tree(trans, log_root_tree, &wc);
- if (ret) {
+ ret = walk_log_tree(&wc);
+ wc.log = NULL;
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -7304,12 +7641,11 @@ again:
key.offset = (u64)-1;
while (1) {
- struct btrfs_root *log;
struct btrfs_key found_key;
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -7324,20 +7660,19 @@ again:
if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
break;
- log = btrfs_read_tree_root(log_root_tree, &found_key);
- if (IS_ERR(log)) {
- ret = PTR_ERR(log);
+ wc.log = btrfs_read_tree_root(log_root_tree, &found_key);
+ if (IS_ERR(wc.log)) {
+ ret = PTR_ERR(wc.log);
+ wc.log = NULL;
btrfs_abort_transaction(trans, ret);
goto error;
}
- wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
- true);
- if (IS_ERR(wc.replay_dest)) {
- ret = PTR_ERR(wc.replay_dest);
- wc.replay_dest = NULL;
- if (ret != -ENOENT) {
- btrfs_put_root(log);
+ wc.root = btrfs_get_fs_root(fs_info, found_key.offset, true);
+ if (IS_ERR(wc.root)) {
+ ret = PTR_ERR(wc.root);
+ wc.root = NULL;
+ if (unlikely(ret != -ENOENT)) {
btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -7353,33 +7688,34 @@ again:
* block from being modified, and we'll just bail for
* each subsequent pass.
*/
- ret = btrfs_pin_extent_for_log_replay(trans, log->node);
- if (ret) {
- btrfs_put_root(log);
+ ret = btrfs_pin_extent_for_log_replay(trans, wc.log->node);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error;
}
goto next;
}
- wc.replay_dest->log_root = log;
- ret = btrfs_record_root_in_trans(trans, wc.replay_dest);
- if (ret) {
+ wc.root->log_root = wc.log;
+ ret = btrfs_record_root_in_trans(trans, wc.root);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto next;
}
- ret = walk_log_tree(trans, log, &wc);
- if (ret) {
+ ret = walk_log_tree(&wc);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto next;
}
if (wc.stage == LOG_WALK_REPLAY_ALL) {
- struct btrfs_root *root = wc.replay_dest;
+ struct btrfs_root *root = wc.root;
- ret = fixup_inode_link_counts(trans, wc.replay_dest, path);
- if (ret) {
+ wc.subvol_path = path;
+ ret = fixup_inode_link_counts(&wc);
+ wc.subvol_path = NULL;
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto next;
}
@@ -7392,17 +7728,18 @@ again:
* could only happen during mount.
*/
ret = btrfs_init_root_free_objectid(root);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto next;
}
}
next:
- if (wc.replay_dest) {
- wc.replay_dest->log_root = NULL;
- btrfs_put_root(wc.replay_dest);
+ if (wc.root) {
+ wc.root->log_root = NULL;
+ btrfs_put_root(wc.root);
}
- btrfs_put_root(log);
+ btrfs_put_root(wc.log);
+ wc.log = NULL;
if (ret)
goto error;
@@ -7414,7 +7751,7 @@ next:
/* step one is to pin it all, step two is to replay just inodes */
if (wc.pin) {
- wc.pin = 0;
+ wc.pin = false;
wc.process_func = replay_one_buffer;
wc.stage = LOG_WALK_REPLAY_INODES;
goto again;
@@ -7432,14 +7769,13 @@ next:
if (ret)
return ret;
- log_root_tree->log_root = NULL;
clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
- btrfs_put_root(log_root_tree);
return 0;
error:
if (wc.trans)
btrfs_end_transaction(wc.trans);
+ btrfs_put_root(wc.log);
clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index b7a96a005487..46bd8ca58670 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -487,12 +487,12 @@ static int rollback_verity(struct btrfs_inode *inode)
inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
btrfs_sync_inode_flags_to_i_flags(inode);
ret = btrfs_update_inode(trans, inode);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = del_orphan(trans, inode);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -676,11 +676,11 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
if (ret < 0)
return ret;
- if (item.reserved[0] != 0 || item.reserved[1] != 0)
+ if (unlikely(item.reserved[0] != 0 || item.reserved[1] != 0))
return -EUCLEAN;
true_size = btrfs_stack_verity_descriptor_size(&item);
- if (true_size > INT_MAX)
+ if (unlikely(true_size > INT_MAX))
return -EUCLEAN;
if (buf_size == 0)
@@ -802,6 +802,8 @@ static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
}
const struct fsverity_operations btrfs_verityops = {
+ .inode_info_offs = (int)offsetof(struct btrfs_inode, i_verity_info) -
+ (int)offsetof(struct btrfs_inode, vfs_inode),
.begin_enable_verity = btrfs_begin_enable_verity,
.end_enable_verity = btrfs_end_enable_verity,
.get_verity_descriptor = btrfs_get_verity_descriptor,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fa7a929a0461..2bec544d8ba3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1377,8 +1377,8 @@ struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
}
/*
- * Make sure the last byte of label is properly NUL termiated. We use
- * '%s' to print the label, if not properly NUL termiated we can access
+ * Make sure the last byte of label is properly NUL terminated. We use
+ * '%s' to print the label, if not properly NUL terminated we can access
* beyond the label.
*/
if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1])
@@ -1911,7 +1911,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
if (ret < 0)
goto error;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/* Corruption */
btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
ret = -EUCLEAN;
@@ -2243,7 +2243,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
}
ret = btrfs_rm_dev_item(trans, device);
- if (ret) {
+ if (unlikely(ret)) {
/* Any error in dev item removal is critical */
btrfs_crit(fs_info,
"failed to remove device item for devid %llu: %d",
@@ -2722,6 +2722,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
goto error;
}
+ if (bdev_nr_bytes(file_bdev(bdev_file)) <= BTRFS_DEVICE_RANGE_RESERVED) {
+ ret = -EINVAL;
+ goto error;
+ }
+
if (fs_devices->seeding) {
seeding_dev = true;
down_write(&sb->s_umount);
@@ -2838,21 +2843,21 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
mutex_lock(&fs_info->chunk_mutex);
ret = init_first_rw_device(trans);
mutex_unlock(&fs_info->chunk_mutex);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
}
ret = btrfs_add_dev_item(trans, device);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
if (seeding_dev) {
ret = btrfs_finish_sprout(trans);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
@@ -3044,7 +3049,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
- else if (ret > 0) { /* Logic error or corruption */
+ else if (unlikely(ret > 0)) { /* Logic error or corruption */
btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
chunk_offset);
btrfs_abort_transaction(trans, -ENOENT);
@@ -3053,7 +3058,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
}
ret = btrfs_del_item(trans, root, path);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3278,7 +3283,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
- if (ret) {
+ if (unlikely(ret)) {
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3348,7 +3353,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
struct btrfs_space_info *space_info;
space_info = btrfs_find_space_info(fs_info, sys_flags);
- if (!space_info) {
+ if (unlikely(!space_info)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3362,17 +3367,17 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = remove_chunk_item(trans, map, chunk_offset);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- } else if (ret) {
+ } else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3381,7 +3386,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3397,7 +3402,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
btrfs_trans_release_chunk_metadata(trans);
ret = btrfs_remove_block_group(trans, map);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3522,7 +3527,7 @@ again:
mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* On the first search we would find chunk tree with
* offset -1, which is not possible. On subsequent
@@ -4264,7 +4269,7 @@ error:
* @flags: profile to validate
* @extended: if true @flags is treated as an extended profile
*/
-static int alloc_profile_is_valid(u64 flags, int extended)
+static int alloc_profile_is_valid(u64 flags, bool extended)
{
u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
BTRFS_BLOCK_GROUP_PROFILE_MASK);
@@ -4458,7 +4463,7 @@ out_overflow:
}
/*
- * Should be called with balance mutexe held
+ * Should be called with balance mutex held
*/
int btrfs_balance(struct btrfs_fs_info *fs_info,
struct btrfs_balance_control *bctl,
@@ -5036,7 +5041,7 @@ again:
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
btrfs_trans_release_chunk_metadata(trans);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
} else {
@@ -5696,7 +5701,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
item_size = btrfs_chunk_item_size(map->num_stripes);
chunk = kzalloc(item_size, GFP_NOFS);
- if (!chunk) {
+ if (unlikely(!chunk)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7481,7 +7486,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
/*
* Lockdep complains about possible circular locking dependency between
* a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
- * used for freeze procection of a fs (struct super_block.s_writers),
+ * used for freeze protection of a fs (struct super_block.s_writers),
* which we take when starting a transaction, and extent buffers of the
* chunk tree if we call read_one_dev() while holding a lock on an
* extent buffer of the chunk tree. Since we are mounting the filesystem
@@ -7914,8 +7919,6 @@ int btrfs_bg_type_to_factor(u64 flags)
return btrfs_raid_array[index].ncopies;
}
-
-
static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
u64 chunk_offset, u64 devid,
u64 physical_offset, u64 physical_len)
@@ -7929,7 +7932,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
int i;
map = btrfs_find_chunk_map(fs_info, chunk_offset, 1);
- if (!map) {
+ if (unlikely(!map)) {
btrfs_err(fs_info,
"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
physical_offset, devid);
@@ -7938,7 +7941,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
}
stripe_len = btrfs_calc_stripe_length(map);
- if (physical_len != stripe_len) {
+ if (unlikely(physical_len != stripe_len)) {
btrfs_err(fs_info,
"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
physical_offset, devid, map->start, physical_len,
@@ -7958,8 +7961,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
devid, physical_offset, physical_len);
for (i = 0; i < map->num_stripes; i++) {
- if (map->stripes[i].dev->devid == devid &&
- map->stripes[i].physical == physical_offset) {
+ if (unlikely(map->stripes[i].dev->devid == devid &&
+ map->stripes[i].physical == physical_offset)) {
found = true;
if (map->verified_stripes >= map->num_stripes) {
btrfs_err(fs_info,
@@ -7972,7 +7975,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
break;
}
}
- if (!found) {
+ if (unlikely(!found)) {
btrfs_err(fs_info,
"dev extent physical offset %llu devid %llu has no corresponding chunk",
physical_offset, devid);
@@ -7981,13 +7984,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
/* Make sure no dev extent is beyond device boundary */
dev = btrfs_find_device(fs_info->fs_devices, &args);
- if (!dev) {
+ if (unlikely(!dev)) {
btrfs_err(fs_info, "failed to find devid %llu", devid);
ret = -EUCLEAN;
goto out;
}
- if (physical_offset + physical_len > dev->disk_total_bytes) {
+ if (unlikely(physical_offset + physical_len > dev->disk_total_bytes)) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
devid, physical_offset, physical_len,
@@ -7999,8 +8002,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
if (dev->zone_info) {
u64 zone_size = dev->zone_info->zone_size;
- if (!IS_ALIGNED(physical_offset, zone_size) ||
- !IS_ALIGNED(physical_len, zone_size)) {
+ if (unlikely(!IS_ALIGNED(physical_offset, zone_size) ||
+ !IS_ALIGNED(physical_len, zone_size))) {
btrfs_err(fs_info,
"zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone",
devid, physical_offset, physical_len);
@@ -8024,7 +8027,7 @@ static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
struct btrfs_chunk_map *map;
map = rb_entry(node, struct btrfs_chunk_map, rb_node);
- if (map->num_stripes != map->verified_stripes) {
+ if (unlikely(map->num_stripes != map->verified_stripes)) {
btrfs_err(fs_info,
"chunk %llu has missing dev extent, have %d expect %d",
map->start, map->verified_stripes, map->num_stripes);
@@ -8084,7 +8087,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
- if (ret > 0) {
+ if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@@ -8109,7 +8112,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
physical_len = btrfs_dev_extent_length(leaf, dext);
/* Check if this dev extent overlaps with the previous one */
- if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
+ if (unlikely(devid == prev_devid && physical_offset < prev_dev_ext_end)) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
devid, physical_offset, prev_dev_ext_end);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a56e873a3029..2cbf8080eade 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -34,7 +34,7 @@ struct btrfs_zoned_device_info;
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
/*
- * Arbitratry maximum size of one discard request to limit potentially long time
+ * Arbitrary maximum size of one discard request to limit potentially long time
* spent in blkdev_issue_discard().
*/
#define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
@@ -495,7 +495,7 @@ struct btrfs_discard_stripe {
};
/*
- * Context for IO subsmission for device stripe.
+ * Context for IO submission for device stripe.
*
* - Track the unfinished mirrors for mirror based profiles
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 5292cd341f70..6caba8be7c84 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -34,11 +34,9 @@ struct workspace {
int level;
};
-static struct workspace_manager wsm;
-
-struct list_head *zlib_get_workspace(unsigned int level)
+struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
{
- struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
+ struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
struct workspace *workspace = list_entry(ws, struct workspace, list);
workspace->level = level;
@@ -55,8 +53,25 @@ void zlib_free_workspace(struct list_head *ws)
kfree(workspace);
}
-struct list_head *zlib_alloc_workspace(unsigned int level)
+/*
+ * For s390 hardware acceleration, the buffer size should be at least
+ * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
+ *
+ * But if bs > ps we can have large enough folios that meet the s390 hardware
+ * handling.
+ */
+static bool need_special_buffer(struct btrfs_fs_info *fs_info)
+{
+ if (!zlib_deflate_dfltcc_enabled())
+ return false;
+ if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
+ return false;
+ return true;
+}
+
+struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
{
+ const u32 blocksize = fs_info->sectorsize;
struct workspace *workspace;
int workspacesize;
@@ -69,19 +84,15 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
workspace->level = level;
workspace->buf = NULL;
- /*
- * In case of s390 zlib hardware support, allocate lager workspace
- * buffer. If allocator fails, fall back to a single page buffer.
- */
- if (zlib_deflate_dfltcc_enabled()) {
+ if (need_special_buffer(fs_info)) {
workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
__GFP_NOMEMALLOC | __GFP_NORETRY |
__GFP_NOWARN | GFP_NOIO);
workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
}
if (!workspace->buf) {
- workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
- workspace->buf_size = PAGE_SIZE;
+ workspace->buf = kmalloc(blocksize, GFP_KERNEL);
+ workspace->buf_size = blocksize;
}
if (!workspace->strm.workspace || !workspace->buf)
goto fail;
@@ -133,11 +144,15 @@ static int copy_data_into_buffer(struct address_space *mapping,
return 0;
}
-int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
+int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
int ret;
char *data_in = NULL;
char *cfolio_out;
@@ -146,7 +161,8 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
struct folio *out_folio = NULL;
unsigned long len = *total_out;
unsigned long nr_dest_folios = *out_folios;
- const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
+ const unsigned long max_out = nr_dest_folios << min_folio_shift;
+ const u32 blocksize = fs_info->sectorsize;
const u64 orig_end = start + len;
*out_folios = 0;
@@ -155,9 +171,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflateInit(&workspace->strm, workspace->level);
if (unlikely(ret != Z_OK)) {
- struct btrfs_inode *inode = BTRFS_I(mapping->host);
-
- btrfs_err(inode->root->fs_info,
+ btrfs_err(fs_info,
"zlib compression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
ret = -EIO;
@@ -167,7 +181,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -179,7 +193,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->strm.next_in = workspace->buf;
workspace->strm.avail_in = 0;
workspace->strm.next_out = cfolio_out;
- workspace->strm.avail_out = PAGE_SIZE;
+ workspace->strm.avail_out = min_folio_size;
while (workspace->strm.total_in < len) {
/*
@@ -191,10 +205,11 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned int copy_length = min(bytes_left, workspace->buf_size);
/*
- * This can only happen when hardware zlib compression is
- * enabled.
+ * For s390 hardware accelerated zlib, and our folio is smaller
+ * than the copy_length, we need to fill the buffer so that
+ * we can take full advantage of hardware acceleration.
*/
- if (copy_length > PAGE_SIZE) {
+ if (need_special_buffer(fs_info)) {
ret = copy_data_into_buffer(mapping, workspace,
start, copy_length);
if (ret < 0)
@@ -225,9 +240,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
if (unlikely(ret != Z_OK)) {
- struct btrfs_inode *inode = BTRFS_I(mapping->host);
-
- btrfs_warn(inode->root->fs_info,
+ btrfs_warn(fs_info,
"zlib compression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
start);
@@ -237,7 +250,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
}
/* we're making it bigger, give up */
- if (workspace->strm.total_in > 8192 &&
+ if (workspace->strm.total_in > blocksize * 2 &&
workspace->strm.total_in <
workspace->strm.total_out) {
ret = -E2BIG;
@@ -252,7 +265,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -260,7 +273,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
cfolio_out = folio_address(out_folio);
folios[nr_folios] = out_folio;
nr_folios++;
- workspace->strm.avail_out = PAGE_SIZE;
+ workspace->strm.avail_out = min_folio_size;
workspace->strm.next_out = cfolio_out;
}
/* we're all done */
@@ -278,7 +291,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflate(&workspace->strm, Z_FINISH);
if (ret == Z_STREAM_END)
break;
- if (ret != Z_OK && ret != Z_BUF_ERROR) {
+ if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
zlib_deflateEnd(&workspace->strm);
ret = -EIO;
goto out;
@@ -288,7 +301,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -296,7 +309,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
cfolio_out = folio_address(out_folio);
folios[nr_folios] = out_folio;
nr_folios++;
- workspace->strm.avail_out = PAGE_SIZE;
+ workspace->strm.avail_out = min_folio_size;
workspace->strm.next_out = cfolio_out;
}
}
@@ -322,20 +335,22 @@ out:
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
int ret = 0, ret2;
int wbits = MAX_WBITS;
char *data_in;
size_t total_out = 0;
unsigned long folio_in_index = 0;
size_t srclen = cb->compressed_len;
- unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
+ unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
unsigned long buf_start;
struct folio **folios_in = cb->compressed_folios;
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->strm.next_in = data_in;
- workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
@@ -396,7 +411,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
- workspace->strm.avail_in = min(tmp, PAGE_SIZE);
+ workspace->strm.avail_in = min(tmp, min_folio_size);
}
}
if (unlikely(ret != Z_STREAM_END)) {
@@ -484,8 +499,7 @@ out:
return ret;
}
-const struct btrfs_compress_op btrfs_zlib_compress = {
- .workspace_manager = &wsm,
+const struct btrfs_compress_levels btrfs_zlib_compress = {
.min_level = 1,
.max_level = 9,
.default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index ea662036f441..e00036672f33 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -274,7 +274,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
return ret;
}
*nr_zones = ret;
- if (!ret)
+ if (unlikely(!ret))
return -EIO;
/* Populate cache */
@@ -315,7 +315,7 @@ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
if (ret < 0)
return ret;
/* No dev extents at all? Not good */
- if (ret > 0)
+ if (unlikely(ret > 0))
return -EUCLEAN;
}
@@ -503,7 +503,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
}
- if (nreported != zone_info->nr_zones) {
+ if (unlikely(nreported != zone_info->nr_zones)) {
btrfs_err(device->fs_info,
"inconsistent number of zones on %s (%u/%u)",
rcu_dereference(device->name), nreported,
@@ -513,7 +513,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
if (max_active_zones) {
- if (nactive > max_active_zones) {
+ if (unlikely(nactive > max_active_zones)) {
+ if (bdev_max_active_zones(bdev) == 0) {
+ max_active_zones = 0;
+ zone_info->max_active_zones = 0;
+ goto validate;
+ }
btrfs_err(device->fs_info,
"zoned: %u active zones on %s exceeds max_active_zones %u",
nactive, rcu_dereference(device->name),
@@ -526,6 +531,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
}
+validate:
/* Validate superblock log */
nr_zones = BTRFS_NR_SB_LOG_ZONES;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -544,7 +550,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (ret)
goto out;
- if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
+ if (unlikely(nr_zones != BTRFS_NR_SB_LOG_ZONES)) {
btrfs_err(device->fs_info,
"zoned: failed to read super block log zone info at devid %llu zone %u",
device->devid, sb_zone);
@@ -562,7 +568,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
ret = sb_write_pointer(device->bdev,
&zone_info->sb_zones[sb_pos], &sb_wp);
- if (ret != -ENOENT && ret) {
+ if (unlikely(ret != -ENOENT && ret)) {
btrfs_err(device->fs_info,
"zoned: super block log zone corrupted devid %llu zone %u",
device->devid, sb_zone);
@@ -895,7 +901,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
zones);
if (ret < 0)
return ret;
- if (ret != BTRFS_NR_SB_LOG_ZONES)
+ if (unlikely(ret != BTRFS_NR_SB_LOG_ZONES))
return -EIO;
return sb_log_location(bdev, zones, rw, bytenr_ret);
@@ -1247,7 +1253,7 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
root = btrfs_extent_root(fs_info, key.objectid);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* We should not find the exact match */
- if (!ret)
+ if (unlikely(!ret))
ret = -EUCLEAN;
if (ret < 0)
return ret;
@@ -1268,8 +1274,8 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
else
length = fs_info->nodesize;
- if (!(found_key.objectid >= cache->start &&
- found_key.objectid + length <= cache->start + cache->length)) {
+ if (unlikely(!(found_key.objectid >= cache->start &&
+ found_key.objectid + length <= cache->start + cache->length))) {
return -EUCLEAN;
}
*offset_ret = found_key.objectid + length - cache->start;
@@ -1351,7 +1357,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
return 0;
}
- if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ if (unlikely(zone.type == BLK_ZONE_TYPE_CONVENTIONAL)) {
btrfs_err(fs_info,
"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
zone.start << SECTOR_SHIFT, rcu_dereference(device->name),
@@ -1393,7 +1399,7 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
struct zone_info *info,
unsigned long *active)
{
- if (info->alloc_offset == WP_MISSING_DEV) {
+ if (unlikely(info->alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
info->physical);
@@ -1422,13 +1428,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
- if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+ if (unlikely(zone_info[0].alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
zone_info[0].physical);
return -EIO;
}
- if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
+ if (unlikely(zone_info[1].alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
zone_info[1].physical);
@@ -1441,14 +1447,14 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
zone_info[1].alloc_offset = last_alloc;
- if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
+ if (unlikely(zone_info[0].alloc_offset != zone_info[1].alloc_offset)) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
return -EIO;
}
if (test_bit(0, active) != test_bit(1, active)) {
- if (!btrfs_zone_activate(bg))
+ if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else if (test_bit(0, active)) {
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
@@ -1483,16 +1489,16 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
zone_info[i].alloc_offset = last_alloc;
- if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
- !btrfs_test_opt(fs_info, DEGRADED)) {
+ if (unlikely((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
+ !btrfs_test_opt(fs_info, DEGRADED))) {
btrfs_err(fs_info,
"zoned: write pointer offset mismatch of zones in %s profile",
btrfs_bg_type_to_raid_name(map->type));
return -EIO;
}
if (test_bit(0, active) != test_bit(i, active)) {
- if (!btrfs_test_opt(fs_info, DEGRADED) &&
- !btrfs_zone_activate(bg)) {
+ if (unlikely(!btrfs_test_opt(fs_info, DEGRADED) &&
+ !btrfs_zone_activate(bg))) {
return -EIO;
}
} else {
@@ -1548,7 +1554,7 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
if (test_bit(0, active) != test_bit(i, active)) {
- if (!btrfs_zone_activate(bg))
+ if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else {
if (test_bit(0, active))
@@ -1580,7 +1586,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
continue;
if (test_bit(0, active) != test_bit(i, active)) {
- if (!btrfs_zone_activate(bg))
+ if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else {
if (test_bit(0, active))
@@ -1637,7 +1643,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
return 0;
/* Sanity check */
- if (!IS_ALIGNED(length, fs_info->zone_size)) {
+ if (unlikely(!IS_ALIGNED(length, fs_info->zone_size))) {
btrfs_err(fs_info,
"zoned: block group %llu len %llu unaligned to zone size %llu",
logical, length, fs_info->zone_size);
@@ -1750,7 +1756,7 @@ out:
return -EINVAL;
}
- if (cache->alloc_offset > cache->zone_capacity) {
+ if (unlikely(cache->alloc_offset > cache->zone_capacity)) {
btrfs_err(fs_info,
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
cache->alloc_offset, cache->zone_capacity,
@@ -2081,7 +2087,7 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc, NULL, NULL);
- if (ret || !bioc || mapped_length < PAGE_SIZE) {
+ if (unlikely(ret || !bioc || mapped_length < PAGE_SIZE)) {
ret = -EIO;
goto out_put_bioc;
}
@@ -2139,7 +2145,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
if (physical_pos == wp)
return 0;
- if (physical_pos > wp)
+ if (unlikely(physical_pos > wp))
return -EUCLEAN;
length = wp - physical_pos;
@@ -2458,16 +2464,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
return ret;
}
-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
+int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
{
struct btrfs_block_group *block_group;
u64 min_alloc_bytes;
if (!btrfs_is_zoned(fs_info))
- return;
+ return 0;
block_group = btrfs_lookup_block_group(fs_info, logical);
- ASSERT(block_group);
+ if (WARN_ON_ONCE(!block_group))
+ return -ENOENT;
/* No MIXED_BG on zoned btrfs. */
if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -2484,16 +2491,21 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
out:
btrfs_put_block_group(block_group);
+ return 0;
}
static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
{
+ int ret;
struct btrfs_block_group *bg =
container_of(work, struct btrfs_block_group, zone_finish_work);
wait_on_extent_buffer_writeback(bg->last_eb);
free_extent_buffer(bg->last_eb);
- btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length);
+ ret = do_zone_finish(bg, true);
+ if (ret)
+ btrfs_handle_fs_error(bg->fs_info, ret,
+ "Failed to finish block-group's zone");
btrfs_put_block_group(bg);
}
@@ -2515,7 +2527,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
refcount_inc(&eb->refs);
bg->last_eb = eb;
INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
- queue_work(system_unbound_wq, &bg->zone_finish_work);
+ queue_work(system_dfl_wq, &bg->zone_finish_work);
}
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
@@ -2582,9 +2594,9 @@ again:
spin_lock(&space_info->lock);
space_info->total_bytes -= bg->length;
space_info->disk_total -= bg->length * factor;
+ space_info->disk_total -= bg->zone_unusable;
/* There is no allocation ever happened. */
ASSERT(bg->used == 0);
- ASSERT(bg->zone_unusable == 0);
/* No super block in a block group on the zoned setup. */
ASSERT(bg->bytes_super == 0);
spin_unlock(&space_info->lock);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 6e11533b8e14..17c5656580dd 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -83,7 +83,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb);
@@ -234,8 +234,11 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
return true;
}
-static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length) { }
+static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length)
+{
+ return 0;
+}
static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb) { }
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index ff0292615e1f..c9cddcfa337b 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -77,7 +77,6 @@ struct workspace {
*/
struct zstd_workspace_manager {
- const struct btrfs_compress_op *ops;
spinlock_t lock;
struct list_head lru_list;
struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
@@ -86,8 +85,6 @@ struct zstd_workspace_manager {
struct timer_list timer;
};
-static struct zstd_workspace_manager wsm;
-
static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
static inline struct workspace *list_to_workspace(struct list_head *list)
@@ -112,19 +109,19 @@ static inline int clip_level(int level)
*/
static void zstd_reclaim_timer_fn(struct timer_list *timer)
{
+ struct zstd_workspace_manager *zwsm =
+ container_of(timer, struct zstd_workspace_manager, timer);
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
struct list_head *pos, *next;
- ASSERT(timer == &wsm.timer);
-
- spin_lock(&wsm.lock);
+ spin_lock(&zwsm->lock);
- if (list_empty(&wsm.lru_list)) {
- spin_unlock(&wsm.lock);
+ if (list_empty(&zwsm->lru_list)) {
+ spin_unlock(&zwsm->lock);
return;
}
- list_for_each_prev_safe(pos, next, &wsm.lru_list) {
+ list_for_each_prev_safe(pos, next, &zwsm->lru_list) {
struct workspace *victim = container_of(pos, struct workspace,
lru_list);
int level;
@@ -141,15 +138,15 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
list_del(&victim->list);
zstd_free_workspace(&victim->list);
- if (list_empty(&wsm.idle_ws[level]))
- clear_bit(level, &wsm.active_map);
+ if (list_empty(&zwsm->idle_ws[level]))
+ clear_bit(level, &zwsm->active_map);
}
- if (!list_empty(&wsm.lru_list))
- mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
+ if (!list_empty(&zwsm->lru_list))
+ mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
- spin_unlock(&wsm.lock);
+ spin_unlock(&zwsm->lock);
}
/*
@@ -182,49 +179,56 @@ static void zstd_calc_ws_mem_sizes(void)
}
}
-void zstd_init_workspace_manager(void)
+int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info)
{
+ struct zstd_workspace_manager *zwsm;
struct list_head *ws;
- int i;
+ ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL);
+ zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL);
+ if (!zwsm)
+ return -ENOMEM;
zstd_calc_ws_mem_sizes();
+ spin_lock_init(&zwsm->lock);
+ init_waitqueue_head(&zwsm->wait);
+ timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0);
- wsm.ops = &btrfs_zstd_compress;
- spin_lock_init(&wsm.lock);
- init_waitqueue_head(&wsm.wait);
- timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
-
- INIT_LIST_HEAD(&wsm.lru_list);
- for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
- INIT_LIST_HEAD(&wsm.idle_ws[i]);
+ INIT_LIST_HEAD(&zwsm->lru_list);
+ for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
+ INIT_LIST_HEAD(&zwsm->idle_ws[i]);
+ fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm;
- ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
+ ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL);
if (IS_ERR(ws)) {
btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
} else {
- set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
- list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
+ set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map);
+ list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
}
+ return 0;
}
-void zstd_cleanup_workspace_manager(void)
+void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info)
{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct workspace *workspace;
- int i;
- spin_lock_bh(&wsm.lock);
- for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
- while (!list_empty(&wsm.idle_ws[i])) {
- workspace = container_of(wsm.idle_ws[i].next,
+ if (!zwsm)
+ return;
+ fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL;
+ spin_lock_bh(&zwsm->lock);
+ for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
+ while (!list_empty(&zwsm->idle_ws[i])) {
+ workspace = container_of(zwsm->idle_ws[i].next,
struct workspace, list);
list_del(&workspace->list);
list_del(&workspace->lru_list);
zstd_free_workspace(&workspace->list);
}
}
- spin_unlock_bh(&wsm.lock);
-
- timer_delete_sync(&wsm.timer);
+ spin_unlock_bh(&zwsm->lock);
+ timer_delete_sync(&zwsm->timer);
+ kfree(zwsm);
}
/*
@@ -239,29 +243,31 @@ void zstd_cleanup_workspace_manager(void)
* offer the opportunity to reclaim the workspace in favor of allocating an
* appropriately sized one in the future.
*/
-static struct list_head *zstd_find_workspace(int level)
+static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level)
{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct list_head *ws;
struct workspace *workspace;
int i = clip_level(level);
- spin_lock_bh(&wsm.lock);
- for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
- if (!list_empty(&wsm.idle_ws[i])) {
- ws = wsm.idle_ws[i].next;
+ ASSERT(zwsm);
+ spin_lock_bh(&zwsm->lock);
+ for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) {
+ if (!list_empty(&zwsm->idle_ws[i])) {
+ ws = zwsm->idle_ws[i].next;
workspace = list_to_workspace(ws);
list_del_init(ws);
/* keep its place if it's a lower level using this */
workspace->req_level = level;
if (clip_level(level) == workspace->level)
list_del(&workspace->lru_list);
- if (list_empty(&wsm.idle_ws[i]))
- clear_bit(i, &wsm.active_map);
- spin_unlock_bh(&wsm.lock);
+ if (list_empty(&zwsm->idle_ws[i]))
+ clear_bit(i, &zwsm->active_map);
+ spin_unlock_bh(&zwsm->lock);
return ws;
}
}
- spin_unlock_bh(&wsm.lock);
+ spin_unlock_bh(&zwsm->lock);
return NULL;
}
@@ -276,30 +282,33 @@ static struct list_head *zstd_find_workspace(int level)
* attempt to allocate a new workspace. If we fail to allocate one due to
* memory pressure, go to sleep waiting for the max level workspace to free up.
*/
-struct list_head *zstd_get_workspace(int level)
+struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level)
{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct list_head *ws;
unsigned int nofs_flag;
+ ASSERT(zwsm);
+
/* level == 0 means we can use any workspace */
if (!level)
level = 1;
again:
- ws = zstd_find_workspace(level);
+ ws = zstd_find_workspace(fs_info, level);
if (ws)
return ws;
nofs_flag = memalloc_nofs_save();
- ws = zstd_alloc_workspace(level);
+ ws = zstd_alloc_workspace(fs_info, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ws)) {
DEFINE_WAIT(wait);
- prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE);
schedule();
- finish_wait(&wsm.wait, &wait);
+ finish_wait(&zwsm->wait, &wait);
goto again;
}
@@ -318,34 +327,36 @@ again:
* isn't set, it is also set here. Only the max level workspace tries and wakes
* up waiting workspaces.
*/
-void zstd_put_workspace(struct list_head *ws)
+void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws)
{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct workspace *workspace = list_to_workspace(ws);
- spin_lock_bh(&wsm.lock);
+ ASSERT(zwsm);
+ spin_lock_bh(&zwsm->lock);
/* A node is only taken off the lru if we are the corresponding level */
if (clip_level(workspace->req_level) == workspace->level) {
/* Hide a max level workspace from reclaim */
- if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
+ if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
INIT_LIST_HEAD(&workspace->lru_list);
} else {
workspace->last_used = jiffies;
- list_add(&workspace->lru_list, &wsm.lru_list);
- if (!timer_pending(&wsm.timer))
- mod_timer(&wsm.timer,
+ list_add(&workspace->lru_list, &zwsm->lru_list);
+ if (!timer_pending(&zwsm->timer))
+ mod_timer(&zwsm->timer,
jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
}
}
- set_bit(workspace->level, &wsm.active_map);
- list_add(&workspace->list, &wsm.idle_ws[workspace->level]);
+ set_bit(workspace->level, &zwsm->active_map);
+ list_add(&workspace->list, &zwsm->idle_ws[workspace->level]);
workspace->req_level = 0;
- spin_unlock_bh(&wsm.lock);
+ spin_unlock_bh(&zwsm->lock);
if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
- cond_wake_up(&wsm.wait);
+ cond_wake_up(&zwsm->wait);
}
void zstd_free_workspace(struct list_head *ws)
@@ -357,8 +368,9 @@ void zstd_free_workspace(struct list_head *ws)
kfree(workspace);
}
-struct list_head *zstd_alloc_workspace(int level)
+struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level)
{
+ const u32 blocksize = fs_info->sectorsize;
struct workspace *workspace;
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
@@ -371,7 +383,7 @@ struct list_head *zstd_alloc_workspace(int level)
workspace->req_level = level;
workspace->last_used = jiffies;
workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
- workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ workspace->buf = kmalloc(blocksize, GFP_KERNEL);
if (!workspace->mem || !workspace->buf)
goto fail;
@@ -384,11 +396,13 @@ fail:
return ERR_PTR(-ENOMEM);
}
-int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
+int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
zstd_cstream *stream;
int ret = 0;
int nr_folios = 0;
@@ -399,7 +413,9 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long len = *total_out;
const unsigned long nr_dest_folios = *out_folios;
const u64 orig_end = start + len;
- unsigned long max_out = nr_dest_folios * PAGE_SIZE;
+ const u32 blocksize = fs_info->sectorsize;
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
+ unsigned long max_out = nr_dest_folios * min_folio_size;
unsigned int cur_len;
workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
@@ -411,9 +427,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
stream = zstd_init_cstream(&workspace->params, len, workspace->mem,
workspace->size);
if (unlikely(!stream)) {
- struct btrfs_inode *inode = BTRFS_I(mapping->host);
-
- btrfs_err(inode->root->fs_info,
+ btrfs_err(fs_info,
"zstd compression init level %d failed, root %llu inode %llu offset %llu",
workspace->req_level, btrfs_root_id(inode->root),
btrfs_ino(inode), start);
@@ -431,7 +445,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->in_buf.size = cur_len;
/* Allocate and map in the output buffer */
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -439,7 +453,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
while (1) {
size_t ret2;
@@ -447,9 +461,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret2 = zstd_compress_stream(stream, &workspace->out_buf,
&workspace->in_buf);
if (unlikely(zstd_is_error(ret2))) {
- struct btrfs_inode *inode = BTRFS_I(mapping->host);
-
- btrfs_warn(inode->root->fs_info,
+ btrfs_warn(fs_info,
"zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
@@ -459,7 +471,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
}
/* Check to see if we are making it bigger */
- if (tot_in + workspace->in_buf.pos > 8192 &&
+ if (tot_in + workspace->in_buf.pos > blocksize * 2 &&
tot_in + workspace->in_buf.pos <
tot_out + workspace->out_buf.pos) {
ret = -E2BIG;
@@ -475,13 +487,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
/* Check if we need more output space */
if (workspace->out_buf.pos == workspace->out_buf.size) {
- tot_out += PAGE_SIZE;
- max_out -= PAGE_SIZE;
+ tot_out += min_folio_size;
+ max_out -= min_folio_size;
if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -489,8 +501,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out,
- PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
/* We've reached the end of the input */
@@ -522,9 +533,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret2 = zstd_end_stream(stream, &workspace->out_buf);
if (unlikely(zstd_is_error(ret2))) {
- struct btrfs_inode *inode = BTRFS_I(mapping->host);
-
- btrfs_err(inode->root->fs_info,
+ btrfs_err(fs_info,
"zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
@@ -542,13 +551,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
- tot_out += PAGE_SIZE;
- max_out -= PAGE_SIZE;
+ tot_out += min_folio_size;
+ max_out -= min_folio_size;
if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
- out_folio = btrfs_alloc_compr_folio();
+ out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@@ -556,7 +565,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
if (tot_out >= tot_in) {
@@ -578,13 +587,16 @@ out:
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct folio **folios_in = cb->compressed_folios;
size_t srclen = cb->compressed_len;
zstd_dstream *stream;
int ret = 0;
+ const u32 blocksize = fs_info->sectorsize;
+ const unsigned int min_folio_size = btrfs_min_folio_size(fs_info);
unsigned long folio_in_index = 0;
- unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
+ unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
unsigned long buf_start;
unsigned long total_out = 0;
@@ -602,11 +614,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
- workspace->out_buf.size = PAGE_SIZE;
+ workspace->out_buf.size = blocksize;
while (1) {
size_t ret2;
@@ -642,16 +654,16 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (workspace->in_buf.pos == workspace->in_buf.size) {
kunmap_local(workspace->in_buf.src);
folio_in_index++;
- if (folio_in_index >= total_folios_in) {
+ if (unlikely(folio_in_index >= total_folios_in)) {
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
- srclen -= PAGE_SIZE;
+ srclen -= min_folio_size;
workspace->in_buf.src =
kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
}
}
ret = 0;
@@ -718,9 +730,7 @@ finish:
return ret;
}
-const struct btrfs_compress_op btrfs_zstd_compress = {
- /* ZSTD uses own workspace manager */
- .workspace_manager = NULL,
+const struct btrfs_compress_levels btrfs_zstd_compress = {
.min_level = ZSTD_BTRFS_MIN_LEVEL,
.max_level = ZSTD_BTRFS_MAX_LEVEL,
.default_level = ZSTD_BTRFS_DEFAULT_LEVEL,