summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/accessors.h1
-rw-r--r--fs/btrfs/acl.c25
-rw-r--r--fs/btrfs/backref.c37
-rw-r--r--fs/btrfs/backref.h7
-rw-r--r--fs/btrfs/bio.c290
-rw-r--r--fs/btrfs/bio.h39
-rw-r--r--fs/btrfs/block-group.c89
-rw-r--r--fs/btrfs/block-group.h2
-rw-r--r--fs/btrfs/block-rsv.c14
-rw-r--r--fs/btrfs/btrfs_inode.h20
-rw-r--r--fs/btrfs/compression.c47
-rw-r--r--fs/btrfs/compression.h19
-rw-r--r--fs/btrfs/ctree.c238
-rw-r--r--fs/btrfs/ctree.h18
-rw-r--r--fs/btrfs/defrag.c19
-rw-r--r--fs/btrfs/delalloc-space.c4
-rw-r--r--fs/btrfs/delayed-inode.c26
-rw-r--r--fs/btrfs/delayed-ref.c45
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/direct-io.c10
-rw-r--r--fs/btrfs/disk-io.c66
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/extent-tree.c172
-rw-r--r--fs/btrfs/extent-tree.h27
-rw-r--r--fs/btrfs/extent_io.c78
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file-item.c85
-rw-r--r--fs/btrfs/file-item.h4
-rw-r--r--fs/btrfs/file.c38
-rw-r--r--fs/btrfs/free-space-cache.c24
-rw-r--r--fs/btrfs/free-space-tree.c55
-rw-r--r--fs/btrfs/fs.h36
-rw-r--r--fs/btrfs/inode-item.c5
-rw-r--r--fs/btrfs/inode.c276
-rw-r--r--fs/btrfs/ioctl.c219
-rw-r--r--fs/btrfs/messages.c1
-rw-r--r--fs/btrfs/messages.h3
-rw-r--r--fs/btrfs/misc.h12
-rw-r--r--fs/btrfs/ordered-data.c76
-rw-r--r--fs/btrfs/print-tree.c16
-rw-r--r--fs/btrfs/qgroup.c182
-rw-r--r--fs/btrfs/raid-stripe-tree.c18
-rw-r--r--fs/btrfs/raid56.c839
-rw-r--r--fs/btrfs/raid56.h103
-rw-r--r--fs/btrfs/reflink.c15
-rw-r--r--fs/btrfs/relocation.c85
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/scrub.c270
-rw-r--r--fs/btrfs/send.c152
-rw-r--r--fs/btrfs/space-info.c464
-rw-r--r--fs/btrfs/space-info.h43
-rw-r--r--fs/btrfs/subpage.c72
-rw-r--r--fs/btrfs/subpage.h1
-rw-r--r--fs/btrfs/super.c77
-rw-r--r--fs/btrfs/sysfs.c58
-rw-r--r--fs/btrfs/sysfs.h3
-rw-r--r--fs/btrfs/tests/extent-io-tests.c3
-rw-r--r--fs/btrfs/tests/extent-map-tests.c6
-rw-r--r--fs/btrfs/tests/qgroup-tests.c16
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-checker.c23
-rw-r--r--fs/btrfs/tree-log.c183
-rw-r--r--fs/btrfs/tree-log.h8
-rw-r--r--fs/btrfs/uuid-tree.c120
-rw-r--r--fs/btrfs/verity.c32
-rw-r--r--fs/btrfs/volumes.c219
-rw-r--r--fs/btrfs/volumes.h10
-rw-r--r--fs/btrfs/xattr.c41
-rw-r--r--fs/btrfs/zoned.c64
-rw-r--r--fs/btrfs/zoned.h7
73 files changed, 2880 insertions, 2448 deletions
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index 99b3ced12805..78721412951c 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
#include "extent_io.h"
struct extent_buffer;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e0ba00d64ea0..c336e2ab7f8a 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -14,12 +14,13 @@
#include "ctree.h"
#include "xattr.h"
#include "acl.h"
+#include "misc.h"
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
{
int size;
const char *name;
- char *value = NULL;
+ char AUTO_KFREE(value);
struct posix_acl *acl;
if (rcu)
@@ -49,7 +50,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
acl = NULL;
else
acl = ERR_PTR(size);
- kfree(value);
return acl;
}
@@ -59,7 +59,7 @@ int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
{
int ret, size = 0;
const char *name;
- char *value = NULL;
+ char AUTO_KFREE(value);
switch (type) {
case ACL_TYPE_ACCESS:
@@ -85,28 +85,23 @@ int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
nofs_flag = memalloc_nofs_save();
value = kmalloc(size, GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
- if (!value) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!value)
+ return -ENOMEM;
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (ret < 0)
- goto out;
+ return ret;
}
if (trans)
ret = btrfs_setxattr(trans, inode, name, value, size, 0);
else
ret = btrfs_setxattr_trans(inode, name, value, size, 0);
+ if (ret < 0)
+ return ret;
-out:
- kfree(value);
-
- if (!ret)
- set_cached_acl(inode, type, acl);
-
- return ret;
+ set_cached_acl(inode, type, acl);
+ return 0;
}
int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2ab550a1e715..78da47a3d00e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -666,10 +666,9 @@ static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx,
ret = btrfs_search_old_slot(root, &search_key, path, ctx->time_seq);
btrfs_debug(ctx->fs_info,
- "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
- ref->root_id, level, ref->count, ret,
- ref->key_for_search.objectid, ref->key_for_search.type,
- ref->key_for_search.offset);
+"search slot in root %llu (level %d, ref count %d) returned %d for key " BTRFS_KEY_FMT,
+ ref->root_id, level, ref->count, ret,
+ BTRFS_KEY_FMT_VALUE(&ref->key_for_search));
if (ret < 0)
goto out;
@@ -1409,12 +1408,12 @@ static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx,
if (!path)
return -ENOMEM;
if (!ctx->trans) {
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
}
if (ctx->time_seq == BTRFS_SEQ_LAST)
- path->skip_locking = 1;
+ path->skip_locking = true;
again:
head = NULL;
@@ -1561,7 +1560,7 @@ again:
btrfs_release_path(path);
- ret = add_missing_keys(ctx->fs_info, &preftrees, path->skip_locking == 0);
+ ret = add_missing_keys(ctx->fs_info, &preftrees, !path->skip_locking);
if (ret)
goto out;
@@ -2786,7 +2785,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes)
* allocates space to return multiple file system paths for an inode.
* total_bytes to allocate are passed, note that space usable for actual path
* information will be total_bytes - sizeof(struct inode_fs_paths).
- * the returned pointer must be freed with free_ipath() in the end.
+ * the returned pointer must be freed with __free_inode_fs_paths() in the end.
*/
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path)
@@ -2811,14 +2810,6 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
return ifp;
}
-void free_ipath(struct inode_fs_paths *ipath)
-{
- if (!ipath)
- return;
- kvfree(ipath->fspath);
- kfree(ipath);
-}
-
struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info)
{
struct btrfs_backref_iter *ret;
@@ -2834,8 +2825,8 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_inf
}
/* Current backref iterator only supports iteration in commit root */
- ret->path->search_commit_root = 1;
- ret->path->skip_locking = 1;
+ ret->path->search_commit_root = true;
+ ret->path->skip_locking = true;
ret->fs_info = fs_info;
return ret;
@@ -3308,8 +3299,8 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
level = cur->level + 1;
/* Search the tree to find parent blocks referring to the block */
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
path->lowest_level = 0;
@@ -3323,9 +3314,9 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
eb = path->nodes[level];
if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
btrfs_err(fs_info,
-"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
+"couldn't find block (%llu) (level %d) in tree (%llu) with key " BTRFS_KEY_FMT,
cur->bytenr, level - 1, btrfs_root_id(root),
- tree_key->objectid, tree_key->type, tree_key->offset);
+ BTRFS_KEY_FMT_VALUE(tree_key));
btrfs_put_root(root);
ret = -ENOENT;
goto out;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 25d51c246070..1d009b0f4c69 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -241,7 +241,12 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);
-void free_ipath(struct inode_fs_paths *ipath);
+
+DEFINE_FREE(inode_fs_paths, struct inode_fs_paths *,
+ if (_T) {
+ kvfree(_T->fspath);
+ kfree(_T);
+ })
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 21df48e6c4fa..fa1d321a2fb8 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -41,13 +41,17 @@ static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio)
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
+ /* @inode parameter is mandatory. */
+ ASSERT(inode);
+
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
- bbio->fs_info = fs_info;
+ bbio->inode = inode;
bbio->end_io = end_io;
bbio->private = private;
+ bbio->file_offset = file_offset;
atomic_set(&bbio->pending_ios, 1);
WRITE_ONCE(bbio->status, BLK_STS_OK);
}
@@ -60,7 +64,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
* a mempool.
*/
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
struct btrfs_bio *bbio;
@@ -68,7 +72,7 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, end_io, private);
+ btrfs_bio_init(bbio, inode, file_offset, end_io, private);
return bbio;
}
@@ -85,13 +89,13 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
return ERR_CAST(bio);
bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
- bbio->inode = orig_bbio->inode;
- bbio->file_offset = orig_bbio->file_offset;
+ btrfs_bio_init(bbio, orig_bbio->inode, orig_bbio->file_offset, NULL, orig_bbio);
orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
+ bbio->orig_logical = orig_bbio->orig_logical;
+ orig_bbio->orig_logical += map_length;
}
bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
atomic_inc(&orig_bbio->pending_ios);
@@ -100,6 +104,12 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
+ /* Make sure we're already in task context. */
+ ASSERT(in_task());
+
+ if (bbio->async_csum)
+ wait_for_completion(&bbio->csum_done);
+
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
@@ -163,11 +173,30 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
struct btrfs_failed_bio *fbio = repair_bbio->private;
struct btrfs_inode *inode = repair_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
+ /*
+ * We can not move forward the saved_iter, as it will be later
+ * utilized by repair_bbio again.
+ */
+ struct bvec_iter saved_iter = repair_bbio->saved_iter;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u64 logical = repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT;
+ const u32 nr_steps = repair_bbio->saved_iter.bi_size / step;
int mirror = repair_bbio->mirror_num;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+ phys_addr_t paddr;
+ unsigned int slot = 0;
+
+ /* Repair bbio should be eaxctly one block sized. */
+ ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize);
+
+ btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
+ ASSERT(slot < nr_steps);
+ paddrs[slot] = paddr;
+ slot++;
+ }
if (repair_bbio->bio.bi_status ||
- !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
+ !btrfs_data_csum_ok(repair_bbio, dev, 0, paddrs)) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
@@ -186,8 +215,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
mirror = prev_repair_mirror(fbio, mirror);
btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
repair_bbio->file_offset, fs_info->sectorsize,
- repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
- bvec_phys(bv), mirror);
+ logical, paddrs, step, mirror);
} while (mirror != fbio->bbio->mirror_num);
done:
@@ -204,21 +232,25 @@ done:
*/
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
u32 bio_offset,
- phys_addr_t paddr,
+ phys_addr_t paddrs[],
struct btrfs_failed_bio *fbio)
{
struct btrfs_inode *inode = failed_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct folio *folio = page_folio(phys_to_page(paddr));
const u32 sectorsize = fs_info->sectorsize;
- const u32 foff = offset_in_folio(folio, paddr);
- const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 nr_steps = sectorsize / step;
+ /*
+ * For bs > ps cases, the saved_iter can be partially moved forward.
+ * In that case we should round it down to the block boundary.
+ */
+ const u64 logical = round_down(failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
+ sectorsize);
struct btrfs_bio *repair_bbio;
struct bio *repair_bio;
int num_copies;
int mirror;
- ASSERT(foff + sectorsize <= folio_size(folio));
btrfs_debug(fs_info, "repair read error: read error at %llu",
failed_bbio->file_offset + bio_offset);
@@ -238,15 +270,22 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
atomic_inc(&fbio->repair_count);
- repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
+ repair_bio = bio_alloc_bioset(NULL, nr_steps, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
- repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
- bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
+ repair_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ for (int i = 0; i < nr_steps; i++) {
+ int ret;
+
+ ASSERT(offset_in_page(paddrs[i]) + step <= PAGE_SIZE);
+
+ ret = bio_add_page(repair_bio, phys_to_page(paddrs[i]), step,
+ offset_in_page(paddrs[i]));
+ ASSERT(ret == step);
+ }
repair_bbio = btrfs_bio(repair_bio);
- btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
- repair_bbio->inode = failed_bbio->inode;
- repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
+ btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset,
+ NULL, fbio);
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
@@ -258,10 +297,13 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 sectorsize = fs_info->sectorsize;
+ const u32 sectorsize = fs_info->sectorsize;
+ const u32 step = min(sectorsize, PAGE_SIZE);
+ const u32 nr_steps = sectorsize / step;
struct bvec_iter *iter = &bbio->saved_iter;
blk_status_t status = bbio->bio.bi_status;
struct btrfs_failed_bio *fbio = NULL;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
phys_addr_t paddr;
u32 offset = 0;
@@ -280,13 +322,19 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
/* Clear the I/O error. A failed repair will reset it. */
bbio->bio.bi_status = BLK_STS_OK;
- btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
- if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
- fbio = repair_one_sector(bbio, offset, paddr, fbio);
- offset += sectorsize;
+ btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) {
+ paddrs[(offset / step) % nr_steps] = paddr;
+ offset += step;
+
+ if (IS_ALIGNED(offset, sectorsize)) {
+ if (status ||
+ !btrfs_data_csum_ok(bbio, dev, offset - sectorsize, paddrs))
+ fbio = repair_one_sector(bbio, offset - sectorsize,
+ paddrs, fbio);
+ }
}
if (bbio->csum != bbio->csum_inline)
- kfree(bbio->csum);
+ kvfree(bbio->csum);
if (fbio)
btrfs_repair_done(fbio);
@@ -317,36 +365,35 @@ static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_i
return fs_info->endio_workers;
}
-static void btrfs_end_bio_work(struct work_struct *work)
+static void simple_end_io_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
- /* Metadata reads are checked and repaired by the submitter. */
- if (is_data_bbio(bbio))
- btrfs_check_read_bio(bbio, bbio->bio.bi_private);
- else
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ if (bio_op(bio) == REQ_OP_READ) {
+ /* Metadata reads are checked and repaired by the submitter. */
+ if (is_data_bbio(bbio))
+ return btrfs_check_read_bio(bbio, bbio->bio.bi_private);
+ return btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ }
+ if (bio_is_zone_append(bio) && !bio->bi_status)
+ btrfs_record_physical_zoned(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_device *dev = bio->bi_private;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
btrfs_log_dev_io_error(bio, dev);
- if (bio_op(bio) == REQ_OP_READ) {
- INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
- queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
- } else {
- if (bio_is_zone_append(bio) && !bio->bi_status)
- btrfs_record_physical_zoned(bbio);
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
- }
+ INIT_WORK(&bbio->end_io_work, simple_end_io_work);
+ queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
}
static void btrfs_raid56_end_io(struct bio *bio)
@@ -354,6 +401,9 @@ static void btrfs_raid56_end_io(struct bio *bio)
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
+ /* RAID56 endio is always handled in workqueue. */
+ ASSERT(in_task());
+
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
@@ -364,11 +414,12 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}
-static void btrfs_orig_write_end_io(struct bio *bio)
+static void orig_write_end_io_work(struct work_struct *work)
{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
- struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
@@ -393,8 +444,18 @@ static void btrfs_orig_write_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}
-static void btrfs_clone_write_end_io(struct bio *bio)
+static void btrfs_orig_write_end_io(struct bio *bio)
{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, orig_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
+static void clone_write_end_io_work(struct work_struct *work)
+{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;
if (bio->bi_status) {
@@ -409,6 +470,14 @@ static void btrfs_clone_write_end_io(struct bio *bio)
bio_put(bio);
}
+static void btrfs_clone_write_end_io(struct bio *bio)
+{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, clone_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
if (!dev || !dev->bdev ||
@@ -455,6 +524,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
+ struct btrfs_bio *orig_bbio = btrfs_bio(orig_bio);
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
@@ -463,8 +533,11 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
- bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+ /* We need to use endio_work to run end_io in task context. */
+ bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &btrfs_bioset);
bio_inc_remaining(orig_bio);
+ btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode,
+ orig_bbio->file_offset, NULL, NULL);
bio->bi_end_io = btrfs_clone_write_end_io;
}
@@ -509,7 +582,11 @@ static int btrfs_bio_csum(struct btrfs_bio *bbio)
{
if (bbio->bio.bi_opf & REQ_META)
return btree_csum_one_bio(bbio);
- return btrfs_csum_one_bio(bbio);
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ return btrfs_csum_one_bio(bbio, true);
+#else
+ return btrfs_csum_one_bio(bbio, false);
+#endif
}
/*
@@ -581,20 +658,25 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
bool auto_csum_mode = true;
#ifdef CONFIG_BTRFS_EXPERIMENTAL
- struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
- if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
- return false;
-
- auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
+ return true;
+ /*
+ * Write bios will calculate checksum and submit bio at the same time.
+ * Unless explicitly required don't offload serial csum calculate and bio
+ * submit into a workqueue.
+ */
+ return false;
#endif
/* Submit synchronously if the checksum implementation is fast. */
- if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;
/*
@@ -605,7 +687,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
return false;
/* Zoned devices require I/O to be submitted in order. */
- if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
+ if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(fs_info))
return false;
return true;
@@ -620,7 +702,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
@@ -639,11 +721,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
unsigned int nr_segs;
int sector_offset;
- map_length = min(map_length, bbio->fs_info->max_zone_append_size);
- sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
+ map_length = min(map_length, fs_info->max_zone_append_size);
+ sector_offset = bio_split_rw_at(&bbio->bio, &fs_info->limits,
&nr_segs, map_length);
if (sector_offset) {
/*
@@ -651,7 +734,7 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
* sectorsize and thus cause unaligned I/Os. Fix that by
* always rounding down to the nearest boundary.
*/
- return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize);
+ return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, fs_info->sectorsize);
}
return map_length;
}
@@ -659,7 +742,7 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
@@ -670,7 +753,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
blk_status_t status;
int ret;
- if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
+ if (bbio->is_scrub || btrfs_is_data_reloc_root(inode->root))
smap.rst_search_commit_root = true;
else
smap.rst_search_commit_root = false;
@@ -684,6 +767,14 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
goto end_bbio;
}
+ /*
+ * For fscrypt writes we will get the encrypted bio after we've remapped
+ * our bio to the physical disk location, so we need to save the
+ * original bytenr so we know what we're checksumming.
+ */
+ if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio))
+ bbio->orig_logical = logical;
+
map_length = min(map_length, length);
if (use_append)
map_length = btrfs_append_map_length(bbio, map_length);
@@ -734,7 +825,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Csum items for reloc roots have already been cloned at this
* point, so they are handled as part of the no-checksum case.
*/
- if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
+ if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
@@ -782,25 +873,27 @@ end_bbio:
static void assert_bbio_alignment(struct btrfs_bio *bbio)
{
#ifdef CONFIG_BTRFS_ASSERT
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio_vec bvec;
struct bvec_iter iter;
const u32 blocksize = fs_info->sectorsize;
+ const u32 alignment = min(blocksize, PAGE_SIZE);
+ const u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ const u32 length = bbio->bio.bi_iter.bi_size;
- /* Metadata has no extra bs > ps alignment requirement. */
- if (!is_data_bbio(bbio))
- return;
+ /* The logical and length should still be aligned to blocksize. */
+ ASSERT(IS_ALIGNED(logical, blocksize) && IS_ALIGNED(length, blocksize) &&
+ length != 0, "root=%llu inode=%llu logical=%llu length=%u",
+ btrfs_root_id(bbio->inode->root),
+ btrfs_ino(bbio->inode), logical, length);
bio_for_each_bvec(bvec, &bbio->bio, iter)
- ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) &&
- IS_ALIGNED(bvec.bv_len, blocksize),
+ ASSERT(IS_ALIGNED(bvec.bv_offset, alignment) &&
+ IS_ALIGNED(bvec.bv_len, alignment),
"root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u",
btrfs_root_id(bbio->inode->root),
- btrfs_ino(bbio->inode),
- bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT,
- bbio->bio.bi_iter.bi_size, iter.bi_idx,
- bvec.bv_offset,
- bvec.bv_len);
+ btrfs_ino(bbio->inode), logical, length, iter.bi_idx,
+ bvec.bv_offset, bvec.bv_len);
#endif
}
@@ -824,18 +917,36 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
*
* The I/O is issued synchronously to block the repair read completion from
* freeing the bio.
+ *
+ * @ino: Offending inode number
+ * @fileoff: File offset inside the inode
+ * @length: Length of the repair write
+ * @logical: Logical address of the range
+ * @paddrs: Physical address array of the content
+ * @step: Length of for each paddrs
+ * @mirror_num: Mirror number to write to. Must not be zero
*/
-int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
- u64 length, u64 logical, phys_addr_t paddr, int mirror_num)
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
+ u32 length, u64 logical, const phys_addr_t paddrs[],
+ unsigned int step, int mirror_num)
{
+ const u32 nr_steps = DIV_ROUND_UP_POW2(length, step);
struct btrfs_io_stripe smap = { 0 };
- struct bio_vec bvec;
- struct bio bio;
+ struct bio *bio = NULL;
int ret = 0;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
BUG_ON(!mirror_num);
+ /* Basic alignment checks. */
+ ASSERT(IS_ALIGNED(logical, fs_info->sectorsize));
+ ASSERT(IS_ALIGNED(length, fs_info->sectorsize));
+ ASSERT(IS_ALIGNED(fileoff, fs_info->sectorsize));
+ /* Either it's a single data or metadata block. */
+ ASSERT(length <= BTRFS_MAX_BLOCKSIZE);
+ ASSERT(step <= length);
+ ASSERT(is_power_of_2(step));
+
if (btrfs_repair_one_zone(fs_info, logical))
return 0;
@@ -855,24 +966,27 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
goto out_counter_dec;
}
- bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
- bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
- __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr));
- ret = submit_bio_wait(&bio);
+ bio = bio_alloc(smap.dev->bdev, nr_steps, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
+ bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
+ for (int i = 0; i < nr_steps; i++) {
+ ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, offset_in_page(paddrs[i]));
+ /* We should have allocated enough slots to contain all the different pages. */
+ ASSERT(ret == step);
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
if (ret) {
/* try to remap that extent elsewhere? */
btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
- goto out_bio_uninit;
+ goto out_counter_dec;
}
btrfs_info_rl(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- ino, start, btrfs_dev_name(smap.dev),
+ ino, fileoff, btrfs_dev_name(smap.dev),
smap.physical >> SECTOR_SHIFT);
ret = 0;
-out_bio_uninit:
- bio_uninit(&bio);
out_counter_dec:
btrfs_bio_counter_dec(fs_info);
return ret;
@@ -885,16 +999,16 @@ out_counter_dec:
*/
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bbio->bio.bi_iter.bi_size;
struct btrfs_io_stripe smap = { 0 };
int ret;
- ASSERT(fs_info);
ASSERT(mirror_num > 0);
ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
- ASSERT(!bbio->inode);
+ ASSERT(!is_data_inode(bbio->inode));
+ ASSERT(bbio->is_scrub);
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 00883aea55d7..1be74209f0b8 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -18,13 +18,6 @@ struct btrfs_inode;
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
-/*
- * Maximum number of sectors for a single bio to limit the size of the
- * checksum array. This matches the number of bio_vecs per bio and thus the
- * I/O size for buffered I/O.
- */
-#define BTRFS_MAX_BIO_SECTORS (256)
-
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
@@ -34,7 +27,10 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
struct btrfs_bio {
/*
* Inode and offset into it that this I/O operates on.
- * Only set for data I/O.
+ *
+ * If the inode is a data one, csum verification and read-repair
+ * will be done automatically.
+ * If the inode is a metadata one, everything is handled by the caller.
*/
struct btrfs_inode *inode;
u64 file_offset;
@@ -56,11 +52,16 @@ struct btrfs_bio {
* - pointer to the checksums for this bio
* - original physical address from the allocator
* (for zone append only)
+ * - original logical address, used for checksumming fscrypt bios
*/
struct {
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sums;
+ struct work_struct csum_work;
+ struct completion csum_done;
+ struct bvec_iter csum_saved_iter;
u64 orig_physical;
+ u64 orig_logical;
};
/* For metadata reads: parentness verification. */
@@ -76,14 +77,21 @@ struct btrfs_bio {
atomic_t pending_ios;
struct work_struct end_io_work;
- /* File system that this I/O operates on. */
- struct btrfs_fs_info *fs_info;
-
/* Save the first error status of split bio. */
blk_status_t status;
/* Use the commit root to look up csums (data read bio only). */
bool csum_search_commit_root;
+
+ /*
+ * Since scrub will reuse btree inode, we need this flag to distinguish
+ * scrub bios.
+ */
+ bool is_scrub;
+
+ /* Whether the csum generation for data write is async. */
+ bool async_csum;
+
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
@@ -99,10 +107,10 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);
-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private);
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private);
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
@@ -111,7 +119,8 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
-int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
- u64 length, u64 logical, phys_addr_t paddr, int mirror_num);
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
+ u32 length, u64 logical, const phys_addr_t paddrs[],
+ unsigned int step, int mirror_num);
#endif
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5322ef2ae015..08b14449fabe 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -613,8 +613,8 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_
extent_root = btrfs_extent_root(fs_info, max_t(u64, block_group->start,
BTRFS_SUPER_INFO_OFFSET));
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
path->reada = READA_FORWARD;
search_offset = index * div_u64(block_group->length, max_index);
@@ -744,8 +744,8 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
* root to add free space. So we skip locking and search the commit
* root, since its read-only
*/
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
path->reada = READA_FORWARD;
key.objectid = last;
@@ -1065,7 +1065,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_chunk_map *map)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
struct inode *inode;
@@ -1305,7 +1305,6 @@ out:
btrfs_put_block_group(block_group);
if (remove_rsv)
btrfs_dec_delayed_refs_rsv_bg_updates(fs_info);
- btrfs_free_path(path);
return ret;
}
@@ -1403,8 +1402,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, bool force)
* BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
* leeway to allow us to mark this block group as read only.
*/
- if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
- BTRFS_RESERVE_NO_FLUSH))
+ if (btrfs_can_overcommit(sinfo, num_bytes, BTRFS_RESERVE_NO_FLUSH))
ret = 0;
}
@@ -1425,7 +1423,7 @@ out:
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
btrfs_info(cache->fs_info,
"unable to make block group %llu ro", cache->start);
- btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, false);
+ btrfs_dump_space_info(cache->space_info, 0, false);
}
return ret;
}
@@ -1850,12 +1848,10 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!btrfs_should_reclaim(fs_info))
return;
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
- if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
- sb_end_write(fs_info->sb);
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
return;
- }
/*
* Long running balances can keep us blocked here for eternity, so
@@ -1863,7 +1859,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
return;
}
@@ -1947,7 +1942,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
/*
* Get out fast, in case we're read-only or unmounting the
* filesystem. It is OK to drop block groups from the list even
- * for the read-only case. As we did sb_start_write(),
+ * for the read-only case. As we did take the super write lock,
* "mount -o remount,ro" won't happen and read-only filesystem
* means it is forced read-only due to a fatal error. So, it
* never gets back to read-write to let us reclaim again.
@@ -2030,7 +2025,6 @@ end:
list_splice_tail(&retry_list, &fs_info->reclaim_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
}
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@@ -3072,7 +3066,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
* We have allocated a new chunk. We also need to activate that chunk to
* grant metadata tickets for zoned filesystem.
*/
- ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
+ ret = btrfs_zoned_activate_one_bg(space_info, true);
if (ret < 0)
goto out;
@@ -3803,7 +3797,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* reservation and return -EAGAIN, otherwise this function always succeeds.
*/
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc,
+ u64 ram_bytes, u64 num_bytes, bool delalloc,
bool force_wrong_size_class)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -3814,30 +3808,38 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
- goto out;
+ goto out_error;
}
if (btrfs_block_group_should_use_size_class(cache)) {
size_class = btrfs_calc_block_group_size_class(num_bytes);
ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class);
if (ret)
- goto out;
+ goto out_error;
}
+
cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
+
trace_btrfs_space_reservation(cache->fs_info, "space_info",
space_info->flags, num_bytes, 1);
+ spin_unlock(&cache->lock);
+
+ space_info->bytes_reserved += num_bytes;
btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes);
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
/*
* Compression can use less space than we reserved, so wake tickets if
* that happens.
*/
if (num_bytes < ram_bytes)
- btrfs_try_granting_tickets(cache->fs_info, space_info);
-out:
+ btrfs_try_granting_tickets(space_info);
+ spin_unlock(&space_info->lock);
+
+ return 0;
+
+out_error:
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -3859,22 +3861,25 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
bool is_delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
+ bool bg_ro;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
- if (cache->ro)
+ bg_ro = cache->ro;
+ cache->reserved -= num_bytes;
+ if (is_delalloc)
+ cache->delalloc_bytes -= num_bytes;
+ spin_unlock(&cache->lock);
+
+ if (bg_ro)
space_info->bytes_readonly += num_bytes;
else if (btrfs_is_zoned(cache->fs_info))
space_info->bytes_zone_unusable += num_bytes;
- cache->reserved -= num_bytes;
+
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;
- if (is_delalloc)
- cache->delalloc_bytes -= num_bytes;
- spin_unlock(&cache->lock);
-
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
@@ -4192,11 +4197,11 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
should_alloc = should_alloc_chunk(fs_info, space_info, force);
if (space_info->full) {
/* No more free physical space */
+ spin_unlock(&space_info->lock);
if (should_alloc)
ret = -ENOSPC;
else
ret = 0;
- spin_unlock(&space_info->lock);
return ret;
} else if (!should_alloc) {
spin_unlock(&space_info->lock);
@@ -4208,16 +4213,16 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
* recheck if we should continue with our allocation
* attempt.
*/
+ spin_unlock(&space_info->lock);
wait_for_alloc = true;
force = CHUNK_ALLOC_NO_FORCE;
- spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
} else {
/* Proceed with allocation */
- space_info->chunk_alloc = 1;
- wait_for_alloc = false;
+ space_info->chunk_alloc = true;
spin_unlock(&space_info->lock);
+ wait_for_alloc = false;
}
cond_resched();
@@ -4264,7 +4269,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
- space_info->full = 1;
+ space_info->full = true;
else
goto out;
} else {
@@ -4274,7 +4279,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
- space_info->chunk_alloc = 0;
+ space_info->chunk_alloc = false;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
@@ -4315,7 +4320,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, bytes, type);
- btrfs_dump_space_info(fs_info, info, 0, false);
+ btrfs_dump_space_info(info, 0, false);
}
if (left < bytes) {
@@ -4340,7 +4345,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
* We have a new chunk. We also need to activate it for
* zoned filesystem.
*/
- ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
+ ret = btrfs_zoned_activate_one_bg(info, true);
if (ret < 0)
return;
@@ -4460,7 +4465,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
* indicates a real bug if this happens.
*/
if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0))
- btrfs_dump_space_info(info, space_info, 0, false);
+ btrfs_dump_space_info(space_info, 0, false);
/*
* If there was a failure to cleanup a log tree, very likely due to an
@@ -4471,7 +4476,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
!BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
if (WARN_ON(space_info->bytes_reserved > 0))
- btrfs_dump_space_info(info, space_info, 0, false);
+ btrfs_dump_space_info(space_info, 0, false);
}
WARN_ON(space_info->reclaim_size > 0);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 9172104a5889..5f933455118c 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -345,7 +345,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc,
+ u64 ram_bytes, u64 num_bytes, bool delalloc,
bool force_wrong_size_class);
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
bool is_delalloc);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 5ad6de738aee..96cf7a162987 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -218,8 +218,7 @@ int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,
if (num_bytes == 0)
return 0;
- ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info,
- num_bytes, flush);
+ ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, num_bytes, flush);
if (!ret)
btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true);
@@ -259,8 +258,7 @@ int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info,
if (!ret)
return 0;
- ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info,
- num_bytes, flush);
+ ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, num_bytes, flush);
if (!ret) {
btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
return 0;
@@ -387,7 +385,7 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = block_rsv->reserved - block_rsv->size;
btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes);
block_rsv->reserved = block_rsv->size;
- btrfs_try_granting_tickets(fs_info, sinfo);
+ btrfs_try_granting_tickets(sinfo);
}
block_rsv->full = (block_rsv->reserved == block_rsv->size);
@@ -530,8 +528,8 @@ again:
block_rsv->type, ret);
}
try_reserve:
- ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info,
- blocksize, BTRFS_RESERVE_NO_FLUSH);
+ ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, blocksize,
+ BTRFS_RESERVE_NO_FLUSH);
if (!ret)
return block_rsv;
/*
@@ -552,7 +550,7 @@ try_reserve:
* one last time to force a reservation if there's enough actual space
* on disk to make the reservation.
*/
- ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize,
+ ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, blocksize,
BTRFS_RESERVE_FLUSH_EMERGENCY);
if (!ret)
return block_rsv;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index af373d50a901..73602ee8de3f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -18,20 +18,20 @@
#include <linux/lockdep.h>
#include <uapi/linux/btrfs_tree.h>
#include <trace/events/btrfs.h>
+#include "ctree.h"
#include "block-rsv.h"
#include "extent_map.h"
-#include "extent_io.h"
#include "extent-io-tree.h"
-#include "ordered-data.h"
-#include "delayed-inode.h"
-struct extent_state;
struct posix_acl;
struct iov_iter;
struct writeback_control;
struct btrfs_root;
struct btrfs_fs_info;
struct btrfs_trans_handle;
+struct btrfs_bio;
+struct btrfs_file_extent;
+struct btrfs_delayed_node;
/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
@@ -543,16 +543,14 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
#endif
}
-/* Array of bytes with variable length, hexadecimal format 0x1234 */
-#define CSUM_FMT "0x%*phN"
-#define CSUM_FMT_VALUE(size, bytes) size, bytes
-
-void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
- u8 *dest);
+void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info,
+ const phys_addr_t paddr, u8 *dest);
+void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info,
+ const phys_addr_t paddrs[], u8 *dest);
int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
const u8 * const csum_expected);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
- u32 bio_offset, phys_addr_t paddr);
+ u32 bio_offset, const phys_addr_t paddrs[]);
noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
struct btrfs_file_extent *file_extent,
bool nowait);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bacad18357b3..7dda6cc68379 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -67,9 +67,7 @@ static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode,
bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op,
GFP_NOFS, &btrfs_compressed_bioset));
- btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL);
- bbio->inode = inode;
- bbio->file_offset = start;
+ btrfs_bio_init(bbio, inode, start, end_io, NULL);
return to_compressed_bio(bbio);
}
@@ -194,15 +192,13 @@ static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_c
static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc)
{
- struct list_head remove;
+ LIST_HEAD(remove);
struct list_head *tmp, *next;
int freed;
if (compr_pool.count == 0)
return SHRINK_STOP;
- INIT_LIST_HEAD(&remove);
-
/* For now, just simply drain the whole list. */
spin_lock(&compr_pool.lock);
list_splice_init(&compr_pool.list, &remove);
@@ -321,22 +317,6 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
/* the inode may be gone now */
}
-static void btrfs_finish_compressed_write_work(struct work_struct *work)
-{
- struct compressed_bio *cb =
- container_of(work, struct compressed_bio, write_end_work);
-
- btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
- cb->bbio.bio.bi_status == BLK_STS_OK);
-
- if (cb->writeback)
- end_compressed_writeback(cb);
- /* Note, our inode could be gone now */
-
- btrfs_free_compressed_folios(cb);
- bio_put(&cb->bbio.bio);
-}
-
/*
* Do the cleanup once all the compressed pages hit the disk. This will clear
* writeback on the file pages and free the compressed pages.
@@ -347,28 +327,33 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
static void end_bbio_compressed_write(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = to_compressed_bio(bbio);
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
- queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
+ btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
+ cb->bbio.bio.bi_status == BLK_STS_OK);
+
+ if (cb->writeback)
+ end_compressed_writeback(cb);
+ /* Note, our inode could be gone now. */
+ btrfs_free_compressed_folios(cb);
+ bio_put(&cb->bbio.bio);
}
static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
- struct btrfs_fs_info *fs_info = cb->bbio.fs_info;
struct bio *bio = &cb->bbio.bio;
u32 offset = 0;
+ unsigned int findex = 0;
while (offset < cb->compressed_len) {
- struct folio *folio;
+ struct folio *folio = cb->compressed_folios[findex];
+ u32 len = min_t(u32, cb->compressed_len - offset, folio_size(folio));
int ret;
- u32 len = min_t(u32, cb->compressed_len - offset,
- btrfs_min_folio_size(fs_info));
- folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)];
/* Maximum compressed extent is smaller than bio size limit. */
ret = bio_add_folio(bio, folio, len, 0);
ASSERT(ret);
offset += len;
+ findex++;
}
}
@@ -402,7 +387,6 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
cb->compressed_folios = compressed_folios;
cb->compressed_len = ordered->disk_num_bytes;
cb->writeback = writeback;
- INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_folios = nr_folios;
cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
cb->bbio.ordered = ordered;
@@ -1100,7 +1084,8 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
/*
* a less complex decompression routine. Our compressed data fits in a
* single page, and we want to read a single page out of it.
- * start_byte tells us the offset into the compressed data we're interested in
+ * dest_pgoff tells us the offset into the destination folio where we write the
+ * decompressed data.
*/
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
unsigned long dest_pgoff, size_t srclen, size_t destlen)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index eba188a9e3bb..e0228017e861 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -14,14 +14,12 @@
#include <linux/pagemap.h>
#include "bio.h"
#include "fs.h"
-#include "messages.h"
+#include "btrfs_inode.h"
struct address_space;
-struct page;
struct inode;
struct btrfs_inode;
struct btrfs_ordered_extent;
-struct btrfs_bio;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
@@ -65,11 +63,8 @@ struct compressed_bio {
/* Whether this is a write for writeback. */
bool writeback;
- union {
- /* For reads, this is the bio we are copying the data into */
- struct btrfs_bio *orig_bbio;
- struct work_struct write_end_work;
- };
+ /* For reads, this is the bio we are copying the data into. */
+ struct btrfs_bio *orig_bbio;
/* Must be last. */
struct btrfs_bio bbio;
@@ -77,7 +72,7 @@ struct compressed_bio {
static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb)
{
- return cb->bbio.fs_info;
+ return cb->bbio.inode->root->fs_info;
}
/* @range_end must be exclusive. */
@@ -85,8 +80,8 @@ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u6
{
/* @cur must be inside the folio. */
ASSERT(folio_pos(folio) <= cur);
- ASSERT(cur < folio_end(folio));
- return min(range_end, folio_end(folio)) - cur;
+ ASSERT(cur < folio_next_pos(folio));
+ return umin(range_end, folio_next_pos(folio)) - cur;
}
int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
@@ -100,7 +95,7 @@ int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inod
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
- unsigned long start_byte, size_t srclen, size_t destlen);
+ unsigned long dest_pgoff, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 561658aca018..a48b4befbee7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -862,6 +862,75 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
}
/*
+ * Promote a child node to become the new tree root.
+ *
+ * @trans: Transaction handle
+ * @root: Tree root structure to update
+ * @path: Path holding nodes and locks
+ * @level: Level of the parent (old root)
+ * @parent: The parent (old root) with exactly one item
+ *
+ * This helper is called during rebalancing when the root node contains only
+ * a single item (nritems == 1). We can reduce the tree height by promoting
+ * that child to become the new root and freeing the old root node. The path
+ * locks and references are updated accordingly.
+ *
+ * Return: 0 on success, negative errno on failure. The transaction is aborted
+ * on critical errors.
+ */
+static int promote_child_to_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ int level, struct extent_buffer *parent)
+{
+ struct extent_buffer *child;
+ int ret;
+
+ ASSERT(btrfs_header_nritems(parent) == 1);
+
+ child = btrfs_read_node_slot(parent, 0);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+
+ btrfs_tree_lock(child);
+ ret = btrfs_cow_block(trans, root, child, parent, 0, &child, BTRFS_NESTING_COW);
+ if (ret) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ return ret;
+ }
+
+ ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
+ if (unlikely(ret < 0)) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ rcu_assign_pointer(root->node, child);
+
+ add_root_to_dirty_list(root);
+ btrfs_tree_unlock(child);
+
+ path->locks[level] = 0;
+ path->nodes[level] = NULL;
+ btrfs_clear_buffer_dirty(trans, parent);
+ btrfs_tree_unlock(parent);
+ /* Once for the path. */
+ free_extent_buffer(parent);
+
+ root_sub_used_bytes(root);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), parent, 0, 1);
+ /* Once for the root ptr. */
+ free_extent_buffer_stale(parent);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
* node level balancing, used to make sure nodes are in proper order for
* item deletion. We balance from the top down, so we have to make sure
* that a deletion won't leave an node completely empty later on.
@@ -900,55 +969,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* by promoting the node below to a root
*/
if (!parent) {
- struct extent_buffer *child;
-
if (btrfs_header_nritems(mid) != 1)
return 0;
- /* promote the child to a root */
- child = btrfs_read_node_slot(mid, 0);
- if (IS_ERR(child)) {
- ret = PTR_ERR(child);
- goto out;
- }
-
- btrfs_tree_lock(child);
- ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
- BTRFS_NESTING_COW);
- if (ret) {
- btrfs_tree_unlock(child);
- free_extent_buffer(child);
- goto out;
- }
-
- ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
- if (unlikely(ret < 0)) {
- btrfs_tree_unlock(child);
- free_extent_buffer(child);
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
- rcu_assign_pointer(root->node, child);
-
- add_root_to_dirty_list(root);
- btrfs_tree_unlock(child);
-
- path->locks[level] = 0;
- path->nodes[level] = NULL;
- btrfs_clear_buffer_dirty(trans, mid);
- btrfs_tree_unlock(mid);
- /* once for the path */
- free_extent_buffer(mid);
-
- root_sub_used_bytes(root);
- ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
- /* once for the root ptr */
- free_extent_buffer_stale(mid);
- if (unlikely(ret < 0)) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
- return 0;
+ return promote_child_to_root(trans, root, path, level, mid);
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
@@ -1101,11 +1125,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* update the path */
if (left) {
if (btrfs_header_nritems(left) > orig_slot) {
- refcount_inc(&left->refs);
/* left was locked after cow */
path->nodes[level] = left;
path->slots[level + 1] -= 1;
path->slots[level] = orig_slot;
+ /* Left is now owned by path. */
+ left = NULL;
if (mid) {
btrfs_tree_unlock(mid);
free_extent_buffer(mid);
@@ -1125,8 +1150,7 @@ out:
free_extent_buffer(right);
}
if (left) {
- if (path->nodes[level] != left)
- btrfs_tree_unlock(left);
+ btrfs_tree_unlock(left);
free_extent_buffer(left);
}
return ret;
@@ -1435,8 +1459,8 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
if (i >= lowest_unlock && i > skip_level) {
- check_skip = false;
btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
+ check_skip = false;
path->locks[i] = 0;
if (write_lock_level &&
i > min_write_lock_level &&
@@ -1709,9 +1733,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
level = btrfs_header_level(b);
/*
* Ensure that all callers have set skip_locking when
- * p->search_commit_root = 1.
+ * p->search_commit_root is true.
*/
- ASSERT(p->skip_locking == 1);
+ ASSERT(p->skip_locking);
goto out;
}
@@ -2599,12 +2623,11 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
if (unlikely(btrfs_comp_keys(&disk_key, new_key) >= 0)) {
btrfs_print_leaf(eb);
btrfs_crit(fs_info,
- "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT,
slot, btrfs_disk_key_objectid(&disk_key),
btrfs_disk_key_type(&disk_key),
btrfs_disk_key_offset(&disk_key),
- new_key->objectid, new_key->type,
- new_key->offset);
+ BTRFS_KEY_FMT_VALUE(new_key));
BUG();
}
}
@@ -2613,12 +2636,11 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
if (unlikely(btrfs_comp_keys(&disk_key, new_key) <= 0)) {
btrfs_print_leaf(eb);
btrfs_crit(fs_info,
- "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT,
slot, btrfs_disk_key_objectid(&disk_key),
btrfs_disk_key_type(&disk_key),
btrfs_disk_key_offset(&disk_key),
- new_key->objectid, new_key->type,
- new_key->offset);
+ BTRFS_KEY_FMT_VALUE(new_key));
BUG();
}
}
@@ -2677,10 +2699,9 @@ static bool check_sibling_keys(const struct extent_buffer *left,
btrfs_crit(left->fs_info, "right extent buffer:");
btrfs_print_tree(right, false);
btrfs_crit(left->fs_info,
-"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
- left_last.objectid, left_last.type,
- left_last.offset, right_first.objectid,
- right_first.type, right_first.offset);
+"bad key order, sibling blocks, left last " BTRFS_KEY_FMT " right first " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&left_last),
+ BTRFS_KEY_FMT_VALUE(&right_first));
return true;
}
return false;
@@ -3217,10 +3238,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
- if (btrfs_header_nritems(path->nodes[0]) == 0)
- btrfs_clear_buffer_dirty(trans, path->nodes[0]);
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
path->nodes[0] = right;
path->slots[1] += 1;
} else {
@@ -3398,9 +3417,13 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(left, old_left_nritems + push_items);
/* fixup right node */
- if (push_items > right_nritems)
- WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
- right_nritems);
+ if (unlikely(push_items > right_nritems)) {
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_crit(fs_info, "push items (%d) > right leaf items (%u)",
+ push_items, right_nritems);
+ goto out;
+ }
if (push_items < right_nritems) {
push_space = btrfs_item_offset(right, push_items - 1) -
@@ -3433,8 +3456,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
path->nodes[0] = left;
path->slots[1] -= 1;
} else {
@@ -3861,10 +3884,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- path->keep_locks = 1;
- path->search_for_split = 1;
+ path->keep_locks = true;
+ path->search_for_split = true;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- path->search_for_split = 0;
+ path->search_for_split = false;
if (ret > 0)
ret = -EAGAIN;
if (ret < 0)
@@ -3891,11 +3914,11 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
if (ret)
goto err;
- path->keep_locks = 0;
+ path->keep_locks = false;
btrfs_unlock_up_safe(path, 1);
return 0;
err:
- path->keep_locks = 0;
+ path->keep_locks = false;
return ret;
}
@@ -4109,7 +4132,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(leaf);
- if (btrfs_leaf_free_space(leaf) < data_size) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < data_size)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4139,7 +4162,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
memmove_leaf_data(leaf, data_end - data_size, data_end,
old_data - data_end);
- data_end = old_data;
old_size = btrfs_item_size(leaf, slot);
btrfs_set_item_size(leaf, slot, old_size + data_size);
btrfs_mark_buffer_dirty(trans, leaf);
@@ -4498,9 +4520,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* delete the leaf if we've emptied it */
if (nritems == 0) {
- if (leaf == root->node) {
- btrfs_set_header_level(leaf, 0);
- } else {
+ if (leaf != root->node) {
btrfs_clear_buffer_dirty(trans, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
if (ret < 0)
@@ -4566,10 +4586,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (btrfs_header_nritems(leaf) == 0) {
path->slots[1] = slot;
ret = btrfs_del_leaf(trans, root, path, leaf);
+ free_extent_buffer(leaf);
if (ret < 0)
return ret;
- free_extent_buffer(leaf);
- ret = 0;
} else {
/* if we're still in the path, make sure
* we're dirty. Otherwise, one of the
@@ -4613,11 +4632,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
u32 nritems;
int level;
int ret = 1;
- int keep_locks = path->keep_locks;
+ const bool keep_locks = path->keep_locks;
ASSERT(!path->nowait);
ASSERT(path->lowest_level == 0);
- path->keep_locks = 1;
+ path->keep_locks = true;
again:
cur = btrfs_read_lock_root_node(root);
level = btrfs_header_level(cur);
@@ -4707,7 +4726,7 @@ out:
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
*
- * path->keep_locks should be set to 1 on the search made before
+ * path->keep_locks should be set to true on the search made before
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -4806,13 +4825,13 @@ again:
next = NULL;
btrfs_release_path(path);
- path->keep_locks = 1;
+ path->keep_locks = true;
if (time_seq) {
ret = btrfs_search_old_slot(root, &key, path, time_seq);
} else {
if (path->need_commit_sem) {
- path->need_commit_sem = 0;
+ path->need_commit_sem = false;
need_commit_sem = true;
if (path->nowait) {
if (!down_read_trylock(&fs_info->commit_root_sem)) {
@@ -4825,41 +4844,30 @@ again:
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
- path->keep_locks = 0;
+ path->keep_locks = false;
if (ret < 0)
goto done;
nritems = btrfs_header_nritems(path->nodes[0]);
/*
- * by releasing the path above we dropped all our locks. A balance
- * could have added more items next to the key that used to be
- * at the very end of the block. So, check again here and
- * advance the path if there are now more items available.
- */
- if (nritems > 0 && path->slots[0] < nritems - 1) {
- if (ret == 0)
- path->slots[0]++;
- ret = 0;
- goto done;
- }
- /*
- * So the above check misses one case:
- * - after releasing the path above, someone has removed the item that
- * used to be at the very end of the block, and balance between leafs
- * gets another one with bigger key.offset to replace it.
+ * By releasing the path above we dropped all our locks. A balance
+ * could have happened and
*
- * This one should be returned as well, or we can get leaf corruption
- * later(esp. in __btrfs_drop_extents()).
+ * 1. added more items after the previous last item
+ * 2. deleted the previous last item
*
- * And a bit more explanation about this check,
- * with ret > 0, the key isn't found, the path points to the slot
- * where it should be inserted, so the path->slots[0] item must be the
- * bigger one.
+ * So, check again here and advance the path if there are now more
+ * items available.
*/
- if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
- ret = 0;
- goto done;
+ if (nritems > 0 && path->slots[0] <= nritems - 1) {
+ if (ret == 0 && path->slots[0] != nritems - 1) {
+ path->slots[0]++;
+ goto done;
+ } else if (ret > 0) {
+ ret = 0;
+ goto done;
+ }
}
while (level < BTRFS_MAX_LEVEL) {
@@ -4964,7 +4972,7 @@ done:
if (need_commit_sem) {
int ret2;
- path->need_commit_sem = 1;
+ path->need_commit_sem = true;
ret2 = finish_need_commit_sem_search(path);
up_read(&fs_info->commit_root_sem);
if (ret2)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fe70b593c7cd..692370fc07b2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -17,9 +17,7 @@
#include <linux/refcount.h>
#include <uapi/linux/btrfs_tree.h>
#include "locking.h"
-#include "fs.h"
#include "accessors.h"
-#include "extent-io-tree.h"
struct extent_buffer;
struct btrfs_block_rsv;
@@ -67,21 +65,21 @@ struct btrfs_path {
* set by btrfs_split_item, tells search_slot to keep all locks
* and to force calls to keep space in the nodes
*/
- unsigned int search_for_split:1;
+ bool search_for_split:1;
/* Keep some upper locks as we walk down. */
- unsigned int keep_locks:1;
- unsigned int skip_locking:1;
- unsigned int search_commit_root:1;
- unsigned int need_commit_sem:1;
- unsigned int skip_release_on_error:1;
+ bool keep_locks:1;
+ bool skip_locking:1;
+ bool search_commit_root:1;
+ bool need_commit_sem:1;
+ bool skip_release_on_error:1;
/*
* Indicate that new item (btrfs_search_slot) is extending already
* existing item and ins_len contains only the data size and not item
* header (ie. sizeof(struct btrfs_item) is not included).
*/
- unsigned int search_for_extension:1;
+ bool search_for_extension:1;
/* Stop search if any locks need to be taken (for read) */
- unsigned int nowait:1;
+ bool nowait:1;
};
#define BTRFS_PATH_AUTO_FREE(path_name) \
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 7b277934f66f..b81e224d4a27 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -15,6 +15,7 @@
#include "defrag.h"
#include "file-item.h"
#include "super.h"
+#include "compression.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
@@ -254,10 +255,9 @@ again:
range.extent_thresh = defrag->extent_thresh;
file_ra_state_init(ra, inode->vfs_inode.i_mapping);
- sb_start_write(fs_info->sb);
- ret = btrfs_defrag_file(inode, ra, &range, defrag->transid,
- BTRFS_DEFRAG_BATCH);
- sb_end_write(fs_info->sb);
+ scoped_guard(super_write, fs_info->sb)
+ ret = btrfs_defrag_file(inode, ra, &range,
+ defrag->transid, BTRFS_DEFRAG_BATCH);
iput(&inode->vfs_inode);
if (ret < 0)
@@ -471,7 +471,7 @@ static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
memcpy(&key, &root->defrag_progress, sizeof(key));
}
- path->keep_locks = 1;
+ path->keep_locks = true;
ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
if (ret < 0)
@@ -514,7 +514,7 @@ static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
/*
* Now that we reallocated the node we can find the next key. Note that
* btrfs_find_next_key() can release our path and do another search
- * without COWing, this is because even with path->keep_locks = 1,
+ * without COWing, this is because even with path->keep_locks == true,
* btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
* node when path->slots[node_level - 1] does not point to the last
* item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
@@ -886,7 +886,7 @@ again:
}
lock_start = folio_pos(folio);
- lock_end = folio_end(folio) - 1;
+ lock_end = folio_next_pos(folio) - 1;
/* Wait for any existing ordered extent in the range */
while (1) {
struct btrfs_ordered_extent *ordered;
@@ -1178,7 +1178,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
if (!folio)
break;
- if (start >= folio_end(folio) || start + len <= folio_pos(folio))
+ if (start >= folio_next_pos(folio) ||
+ start + len <= folio_pos(folio))
continue;
btrfs_folio_clamp_clear_checked(fs_info, folio, start, len);
btrfs_folio_clamp_set_dirty(fs_info, folio, start, len);
@@ -1219,7 +1220,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
folios[i] = NULL;
goto free_folios;
}
- cur = folio_end(folios[i]);
+ cur = folio_next_pos(folios[i]);
}
for (int i = 0; i < nr_pages; i++) {
if (!folios[i])
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 288e1776c02d..0970799d0aa4 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -358,8 +358,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
noflush);
if (ret)
return ret;
- ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info,
- meta_reserve, flush);
+ ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, meta_reserve,
+ flush);
if (ret) {
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
return ret;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 3df7b9d7fbe8..ce6e9f8812e0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -668,7 +668,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_key first_key;
const u32 first_data_size = first_item->data_len;
int total_size;
- char *ins_data = NULL;
+ char AUTO_KFREE(ins_data);
int ret;
bool continuous_keys_only = false;
@@ -740,10 +740,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
ins_data = kmalloc_array(batch.nr,
sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS);
- if (!ins_data) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!ins_data)
+ return -ENOMEM;
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
batch.keys = ins_keys;
@@ -759,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_items(trans, root, path, &batch);
if (ret)
- goto out;
+ return ret;
list_for_each_entry(curr, &item_list, tree_list) {
char *data_ptr;
@@ -814,9 +812,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
}
-out:
- kfree(ins_data);
- return ret;
+
+ return 0;
}
static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
@@ -2011,13 +2008,10 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
* It is very rare.
*/
mutex_lock(&delayed_node->mutex);
- if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
- goto release_node;
-
- set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
- delayed_node->count++;
- atomic_inc(&fs_info->delayed_root->items);
-release_node:
+ if (!test_and_set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) {
+ delayed_node->count++;
+ atomic_inc(&fs_info->delayed_root->items);
+ }
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 481802efaa14..e8bc37453336 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -228,7 +228,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
if (!num_bytes)
return 0;
- ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush);
+ ret = btrfs_reserve_metadata_bytes(space_info, num_bytes, flush);
if (ret)
return ret;
@@ -798,9 +798,13 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
}
/*
- * helper function to actually insert a head node into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count.
+ * Helper function to actually insert a head node into the xarray. This does all
+ * the dirty work in terms of maintaining the correct overall modification
+ * count.
+ *
+ * The caller is responsible for calling kfree() on @qrecord. More specifically,
+ * if this function reports that it did not insert it as noted in
+ * @qrecord_inserted_ret, then it's safe to call kfree() on it.
*
* Returns an error pointer in case of an error.
*/
@@ -814,7 +818,14 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
const unsigned long index = (head_ref->bytenr >> fs_info->sectorsize_bits);
- bool qrecord_inserted = false;
+
+ /*
+ * If 'qrecord_inserted_ret' is provided, then the first thing we need
+ * to do is to initialize it to false just in case we have an exit
+ * before trying to insert the record.
+ */
+ if (qrecord_inserted_ret)
+ *qrecord_inserted_ret = false;
delayed_refs = &trans->transaction->delayed_refs;
lockdep_assert_held(&delayed_refs->lock);
@@ -833,6 +844,12 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
/* Record qgroup extent info if provided */
if (qrecord) {
+ /*
+ * Setting 'qrecord' but not 'qrecord_inserted_ret' will likely
+ * result in a memory leakage.
+ */
+ ASSERT(qrecord_inserted_ret != NULL);
+
int ret;
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord,
@@ -840,12 +857,10 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
if (ret) {
/* Clean up if insertion fails or item exists. */
xa_release(&delayed_refs->dirty_extents, index);
- /* Caller responsible for freeing qrecord on error. */
if (ret < 0)
return ERR_PTR(ret);
- kfree(qrecord);
- } else {
- qrecord_inserted = true;
+ } else if (qrecord_inserted_ret) {
+ *qrecord_inserted_ret = true;
}
}
@@ -888,8 +903,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
}
- if (qrecord_inserted_ret)
- *qrecord_inserted_ret = qrecord_inserted;
return head_ref;
}
@@ -1049,6 +1062,14 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
xa_release(&delayed_refs->head_refs, index);
spin_unlock(&delayed_refs->lock);
ret = PTR_ERR(new_head_ref);
+
+ /*
+ * It's only safe to call kfree() on 'qrecord' if
+ * add_delayed_ref_head() has _not_ inserted it for
+ * tracing. Otherwise we need to handle this here.
+ */
+ if (!qrecord_reserved || qrecord_inserted)
+ goto free_head_ref;
goto free_record;
}
head_ref = new_head_ref;
@@ -1071,6 +1092,8 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(trans, record, generic_ref->bytenr);
+
+ kfree(record);
return 0;
free_record:
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a4eaef60549e..b6c7da8e1bc8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -489,8 +489,8 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
}
path->reada = READA_FORWARD;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
key.objectid = src_dev->devid;
key.type = BTRFS_DEV_EXTENT_KEY;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 69863e398e22..085a83ae9e62 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -9,6 +9,7 @@
#include "transaction.h"
#include "accessors.h"
#include "dir-item.h"
+#include "delayed-inode.h"
/*
* insert a name into a directory, doing overflow properly if there is a hash
@@ -111,7 +112,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
int ret = 0;
int ret2 = 0;
struct btrfs_root *root = dir->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *dir_item;
struct extent_buffer *leaf;
unsigned long name_ptr;
@@ -163,7 +164,6 @@ second_insert:
ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir,
&disk_key, type, index);
out_free:
- btrfs_free_path(path);
if (ret)
return ret;
if (ret2)
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 802d4dbe5b38..07e19e88ba4b 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -10,6 +10,8 @@
#include "fs.h"
#include "transaction.h"
#include "volumes.h"
+#include "bio.h"
+#include "ordered-data.h"
struct btrfs_dio_data {
ssize_t submitted;
@@ -184,7 +186,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
alloc_hint = btrfs_get_extent_allocation_hint(inode, start, len);
again:
ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
- 0, alloc_hint, &ins, 1, 1);
+ 0, alloc_hint, &ins, true, true);
if (ret == -EAGAIN) {
ASSERT(btrfs_is_zoned(fs_info));
wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH,
@@ -385,7 +387,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
* to allocate a contiguous array for the checksums.
*/
if (!write)
- len = min_t(u64, len, fs_info->sectorsize * BTRFS_MAX_BIO_SECTORS);
+ len = min_t(u64, len, fs_info->sectorsize * BIO_MAX_VECS);
lockstart = start;
lockend = start + len - 1;
@@ -713,10 +715,8 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
container_of(bbio, struct btrfs_dio_private, bbio);
struct btrfs_dio_data *dio_data = iter->private;
- btrfs_bio_init(bbio, BTRFS_I(iter->inode)->root->fs_info,
+ btrfs_bio_init(bbio, BTRFS_I(iter->inode), file_offset,
btrfs_dio_end_io, bio->bi_private);
- bbio->inode = BTRFS_I(iter->inode);
- bbio->file_offset = file_offset;
dip->file_offset = file_offset;
dip->bytes = bio->bi_iter.bi_size;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0aa7e5d1b05f..89149fac804c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,6 +50,7 @@
#include "relocation.h"
#include "scrub.h"
#include "super.h"
+#include "delayed-inode.h"
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
BTRFS_HEADER_FLAG_RELOC |\
@@ -182,26 +183,33 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
+ const u32 step = min(fs_info->nodesize, PAGE_SIZE);
+ const u32 nr_steps = eb->len / step;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
int ret = 0;
if (sb_rdonly(fs_info->sb))
return -EROFS;
- for (int i = 0; i < num_extent_folios(eb); i++) {
+ for (int i = 0; i < num_extent_pages(eb); i++) {
struct folio *folio = eb->folios[i];
- u64 start = max_t(u64, eb->start, folio_pos(folio));
- u64 end = min_t(u64, eb->start + eb->len,
- folio_pos(folio) + eb->folio_size);
- u32 len = end - start;
- phys_addr_t paddr = PFN_PHYS(folio_pfn(folio)) +
- offset_in_folio(folio, start);
-
- ret = btrfs_repair_io_failure(fs_info, 0, start, len, start,
- paddr, mirror_num);
- if (ret)
- break;
+
+ /* No large folio support yet. */
+ ASSERT(folio_order(folio) == 0);
+ ASSERT(i < nr_steps);
+
+ /*
+ * For nodesize < page size, there is just one paddr, with some
+ * offset inside the page.
+ *
+ * For nodesize >= page size, it's one or more paddrs, and eb->start
+ * must be aligned to page boundary.
+ */
+ paddrs[i] = page_to_phys(&folio->page) + offset_in_page(eb->start);
}
+ ret = btrfs_repair_io_failure(fs_info, 0, eb->start, eb->len, eb->start,
+ paddrs, step, mirror_num);
return ret;
}
@@ -398,10 +406,10 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
if (memcmp(result, header_csum, csum_size) != 0) {
btrfs_warn_rl(fs_info,
-"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s",
+"checksum verify failed on logical %llu mirror %u wanted " BTRFS_CSUM_FMT " found " BTRFS_CSUM_FMT " level %d%s",
eb->start, eb->read_mirror,
- CSUM_FMT_VALUE(csum_size, header_csum),
- CSUM_FMT_VALUE(csum_size, result),
+ BTRFS_CSUM_FMT_VALUE(csum_size, header_csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, result),
btrfs_header_level(eb),
ignore_csum ? ", ignored" : "");
if (unlikely(!ignore_csum)) {
@@ -644,20 +652,10 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
if (!root)
return NULL;
- memset(&root->root_key, 0, sizeof(root->root_key));
- memset(&root->root_item, 0, sizeof(root->root_item));
- memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
root->fs_info = fs_info;
root->root_key.objectid = objectid;
- root->node = NULL;
- root->commit_root = NULL;
- root->state = 0;
RB_CLEAR_NODE(&root->rb_node);
- btrfs_set_root_last_trans(root, 0);
- root->free_objectid = 0;
- root->nr_delalloc_inodes = 0;
- root->nr_ordered_extents = 0;
xa_init(&root->inodes);
xa_init(&root->delayed_nodes);
@@ -691,10 +689,7 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
refcount_set(&root->refs, 1);
atomic_set(&root->snapshot_force_cow, 0);
atomic_set(&root->nr_swapfiles, 0);
- btrfs_set_root_log_transid(root, 0);
root->log_transid_committed = -1;
- btrfs_set_root_last_log_commit(root, 0);
- root->anon_dev = 0;
if (!btrfs_is_testing(fs_info)) {
btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES);
@@ -1773,8 +1768,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
destroy_workqueue(fs_info->endio_workers);
if (fs_info->rmw_workers)
destroy_workqueue(fs_info->rmw_workers);
- if (fs_info->compressed_write_workers)
- destroy_workqueue(fs_info->compressed_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->delayed_workers);
@@ -1986,8 +1979,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->endio_write_workers =
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
max_active, 2);
- fs_info->compressed_write_workers =
- alloc_workqueue("btrfs-compressed-write", flags, max_active);
fs_info->endio_freespace_worker =
btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
max_active, 0);
@@ -2003,7 +1994,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
- fs_info->compressed_write_workers &&
fs_info->endio_write_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->fixup_workers &&
@@ -3255,12 +3245,6 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
PAGE_SIZE, fs_info->sectorsize);
return -EINVAL;
}
- if (fs_info->sectorsize > PAGE_SIZE && btrfs_fs_incompat(fs_info, RAID56)) {
- btrfs_err(fs_info,
- "RAID56 is not supported for page size %lu with sectorsize %u",
- PAGE_SIZE, fs_info->sectorsize);
- return -EINVAL;
- }
/* This can be called by remount, we need to protect the super block. */
spin_lock(&fs_info->super_lock);
@@ -4290,7 +4274,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
/*
* When finishing a compressed write bio we schedule a work queue item
- * to finish an ordered extent - btrfs_finish_compressed_write_work()
+ * to finish an ordered extent - end_bbio_compressed_write()
* calls btrfs_finish_ordered_extent() which in turns does a call to
* btrfs_queue_ordered_fn(), and that queues the ordered extent
* completion either in the endio_write_workers work queue or in the
@@ -4298,7 +4282,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
* below, so before we flush them we must flush this queue for the
* workers of compressed writes.
*/
- flush_workqueue(fs_info->compressed_write_workers);
+ flush_workqueue(fs_info->endio_workers);
/*
* After we parked the cleaner kthread, ordered extents may have
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 57920f2c6fe4..5320da83d0cf 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -9,7 +9,8 @@
#include <linux/sizes.h>
#include <linux/compiler_types.h>
#include "ctree.h"
-#include "fs.h"
+#include "bio.h"
+#include "ordered-data.h"
struct block_device;
struct super_block;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dc4ca98c3780..e4cae34620d1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -40,6 +40,7 @@
#include "orphan.h"
#include "tree-checker.h"
#include "raid-stripe-tree.h"
+#include "delayed-inode.h"
#undef SCRAMBLE_DELAYED_REFS
@@ -164,8 +165,8 @@ search_again:
if (unlikely(num_refs == 0)) {
ret = -EUCLEAN;
btrfs_err(fs_info,
- "unexpected zero reference count for extent item (%llu %u %llu)",
- key.objectid, key.type, key.offset);
+ "unexpected zero reference count for extent item " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&key));
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -597,8 +598,8 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
} else {
btrfs_err(trans->fs_info,
- "unrecognized backref key (%llu %u %llu)",
- key.objectid, key.type, key.offset);
+ "unrecognized backref key " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&key));
btrfs_abort_transaction(trans, -EUCLEAN);
return -EUCLEAN;
}
@@ -788,7 +789,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
want = extent_ref_type(parent, owner);
if (insert) {
extra_size = btrfs_extent_inline_ref_size(want);
- path->search_for_extension = 1;
+ path->search_for_extension = true;
} else
extra_size = -1;
@@ -954,7 +955,7 @@ again:
if (!path->keep_locks) {
btrfs_release_path(path);
- path->keep_locks = 1;
+ path->keep_locks = true;
goto again;
}
@@ -975,11 +976,11 @@ out_no_entry:
*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
out:
if (path->keep_locks) {
- path->keep_locks = 0;
+ path->keep_locks = false;
btrfs_unlock_up_safe(path, 1);
}
if (insert)
- path->search_for_extension = 0;
+ path->search_for_extension = false;
return ret;
}
@@ -1764,7 +1765,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
if (TRANS_ABORTED(trans)) {
if (insert_reserved) {
- btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
+ btrfs_pin_extent(trans, node->bytenr, node->num_bytes);
free_head_ref_squota_rsv(trans->fs_info, href);
}
return 0;
@@ -1783,7 +1784,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
else
BUG();
if (ret && insert_reserved)
- btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
+ btrfs_pin_extent(trans, node->bytenr, node->num_bytes);
if (ret < 0)
btrfs_err(trans->fs_info,
"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
@@ -1890,7 +1891,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
spin_unlock(&delayed_refs->lock);
if (head->must_insert_reserved) {
- btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
+ btrfs_pin_extent(trans, head->bytenr, head->num_bytes);
if (head->is_data) {
struct btrfs_root *csum_root;
@@ -2591,34 +2592,34 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
}
static int pin_down_extent(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *cache,
- u64 bytenr, u64 num_bytes, int reserved)
+ struct btrfs_block_group *bg,
+ u64 bytenr, u64 num_bytes, bool reserved)
{
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += num_bytes;
- btrfs_space_info_update_bytes_pinned(cache->space_info, num_bytes);
- if (reserved) {
- cache->reserved -= num_bytes;
- cache->space_info->bytes_reserved -= num_bytes;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ struct btrfs_space_info *space_info = bg->space_info;
+ const u64 reserved_bytes = (reserved ? num_bytes : 0);
+
+ spin_lock(&space_info->lock);
+ spin_lock(&bg->lock);
+ bg->pinned += num_bytes;
+ bg->reserved -= reserved_bytes;
+ spin_unlock(&bg->lock);
+ space_info->bytes_reserved -= reserved_bytes;
+ btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
+ spin_unlock(&space_info->lock);
btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr,
bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
return 0;
}
-int btrfs_pin_extent(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, int reserved)
+int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes)
{
struct btrfs_block_group *cache;
cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
BUG_ON(!cache); /* Logic error */
- pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
+ pin_down_extent(trans, cache, bytenr, num_bytes, true);
btrfs_put_block_group(cache);
return 0;
@@ -2642,7 +2643,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- pin_down_extent(trans, cache, eb->start, eb->len, 0);
+ pin_down_extent(trans, cache, eb->start, eb->len, false);
/* remove us from the free space cache (if we're there at all) */
ret = btrfs_remove_free_space(cache, eb->start, eb->len);
@@ -2747,13 +2748,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
struct btrfs_free_cluster *cluster = NULL;
u64 total_unpinned = 0;
u64 empty_cluster = 0;
- bool readonly;
- int ret = 0;
while (start <= end) {
u64 len;
+ bool readonly;
- readonly = false;
if (!cache ||
start >= cache->start + cache->length) {
if (cache)
@@ -2762,8 +2761,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
cache = btrfs_lookup_block_group(fs_info, start);
if (unlikely(cache == NULL)) {
/* Logic error, something removed the block group. */
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
cluster = fetch_cluster_info(fs_info,
@@ -2797,27 +2795,28 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
+ readonly = cache->ro;
cache->pinned -= len;
+ spin_unlock(&cache->lock);
+
btrfs_space_info_update_bytes_pinned(space_info, -len);
space_info->max_extent_size = 0;
- if (cache->ro) {
+
+ if (readonly) {
space_info->bytes_readonly += len;
- readonly = true;
} else if (btrfs_is_zoned(fs_info)) {
/* Need reset before reusing in a zoned block group */
btrfs_space_info_update_bytes_zone_unusable(space_info, len);
- readonly = true;
- }
- spin_unlock(&cache->lock);
- if (!readonly && return_free_space)
+ } else if (return_free_space) {
btrfs_return_free_space(space_info, len);
+ }
spin_unlock(&space_info->lock);
}
if (cache)
btrfs_put_block_group(cache);
-out:
- return ret;
+
+ return 0;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
@@ -3086,7 +3085,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_key key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *extent_root;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -3121,7 +3120,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
node->bytenr, refs_to_drop);
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
if (is_data)
@@ -3166,15 +3165,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
abort_and_dump(trans, path,
"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop, is_data);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
btrfs_release_path(path);
@@ -3223,7 +3221,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
extent_slot = path->slots[0];
}
@@ -3232,10 +3230,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
bytenr, node->parent, node->ref_root, owner_objectid,
owner_offset, path->slots[0]);
- goto out;
+ return ret;
} else {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
leaf = path->nodes[0];
@@ -3246,7 +3244,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"unexpected extent item size, has %u expect >= %zu",
item_size, sizeof(*ei));
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
ei = btrfs_item_ptr(leaf, extent_slot,
struct btrfs_extent_item);
@@ -3260,8 +3258,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
key.objectid, key.type, key.offset,
path->slots[0], owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi));
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
bi = (struct btrfs_tree_block_info *)(ei + 1);
WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
@@ -3272,8 +3269,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
abort_and_dump(trans, path,
"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
refs_to_drop, refs, bytenr, path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
refs -= refs_to_drop;
@@ -3289,8 +3285,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
abort_and_dump(trans, path,
"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
} else {
btrfs_set_extent_refs(leaf, ei, refs);
@@ -3300,7 +3295,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
iref, refs_to_drop, is_data);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
}
} else {
@@ -3320,17 +3315,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
extent_data_ref_count(path, iref),
refs_to_drop, path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
if (iref) {
if (unlikely(path->slots[0] != extent_slot)) {
abort_and_dump(trans, path,
-"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
- key.objectid, key.type,
- key.offset, path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+"invalid iref, extent item key " BTRFS_KEY_FMT " slot %u doesn't have wanted iref",
+ BTRFS_KEY_FMT_VALUE(&key),
+ path->slots[0]);
+ return -EUCLEAN;
}
} else {
/*
@@ -3343,8 +3336,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
abort_and_dump(trans, path,
"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
path->slots[0]);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
path->slots[0] = extent_slot;
num_to_del = 2;
@@ -3365,7 +3357,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
num_to_del);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
btrfs_release_path(path);
@@ -3373,8 +3365,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
-out:
- btrfs_free_path(path);
return ret;
}
@@ -3483,7 +3473,7 @@ int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
bg = btrfs_lookup_block_group(fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- pin_down_extent(trans, bg, buf->start, buf->len, 1);
+ pin_down_extent(trans, bg, buf->start, buf->len, true);
btrfs_put_block_group(bg);
goto out;
}
@@ -3507,7 +3497,7 @@ int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)
|| btrfs_is_zoned(fs_info)) {
- pin_down_extent(trans, bg, buf->start, buf->len, 1);
+ pin_down_extent(trans, bg, buf->start, buf->len, true);
btrfs_put_block_group(bg);
goto out;
}
@@ -3537,7 +3527,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
* tree, just update pinning info and exit early.
*/
if (ref->ref_root == BTRFS_TREE_LOG_OBJECTID) {
- btrfs_pin_extent(trans, ref->bytenr, ref->num_bytes, 1);
+ btrfs_pin_extent(trans, ref->bytenr, ref->num_bytes);
ret = 0;
} else if (ref->type == BTRFS_REF_METADATA) {
ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
@@ -3588,15 +3578,14 @@ enum btrfs_loop_type {
};
static inline void
-btrfs_lock_block_group(struct btrfs_block_group *cache,
- int delalloc)
+btrfs_lock_block_group(struct btrfs_block_group *cache, bool delalloc)
{
if (delalloc)
down_read(&cache->data_rwsem);
}
static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
- int delalloc)
+ bool delalloc)
{
btrfs_get_block_group(cache);
if (delalloc)
@@ -3606,7 +3595,7 @@ static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
static struct btrfs_block_group *btrfs_lock_cluster(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
- int delalloc)
+ bool delalloc)
__acquires(&cluster->refill_lock)
{
struct btrfs_block_group *used_bg = NULL;
@@ -3643,8 +3632,7 @@ static struct btrfs_block_group *btrfs_lock_cluster(
}
static inline void
-btrfs_release_block_group(struct btrfs_block_group *cache,
- int delalloc)
+btrfs_release_block_group(struct btrfs_block_group *cache, bool delalloc)
{
if (delalloc)
up_read(&cache->data_rwsem);
@@ -4034,7 +4022,7 @@ static int do_allocation(struct btrfs_block_group *block_group,
static void release_block_group(struct btrfs_block_group *block_group,
struct find_free_extent_ctl *ffe_ctl,
- int delalloc)
+ bool delalloc)
{
switch (ffe_ctl->policy) {
case BTRFS_EXTENT_ALLOC_CLUSTERED:
@@ -4690,7 +4678,7 @@ loop:
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data, int delalloc)
+ struct btrfs_key *ins, bool is_data, bool delalloc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct find_free_extent_ctl ffe_ctl = {};
@@ -4735,8 +4723,7 @@ again:
"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
flags, num_bytes, for_treelog, for_data_reloc);
if (sinfo)
- btrfs_dump_space_info(fs_info, sinfo,
- num_bytes, 1);
+ btrfs_dump_space_info(sinfo, num_bytes, 1);
}
}
@@ -4776,7 +4763,7 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans,
return -ENOSPC;
}
- ret = pin_down_extent(trans, cache, eb->start, eb->len, 1);
+ ret = pin_down_extent(trans, cache, eb->start, eb->len, true);
btrfs_put_block_group(cache);
return ret;
}
@@ -5022,7 +5009,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
offset, ins, 1, root_objectid);
if (ret)
- btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
+ btrfs_pin_extent(trans, ins->objectid, ins->offset);
ret = btrfs_record_squota_delta(fs_info, &delta);
btrfs_put_block_group(block_group);
return ret;
@@ -5168,7 +5155,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
- empty_size, hint, &ins, 0, 0);
+ empty_size, hint, &ins, false, false);
if (ret)
goto out_unuse;
@@ -6061,7 +6048,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root_item *root_item = &root->root_item;
- struct walk_control *wc;
+ struct walk_control AUTO_KFREE(wc);
struct btrfs_key key;
const u64 rootid = btrfs_root_id(root);
int ret = 0;
@@ -6079,9 +6066,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc
wc = kzalloc(sizeof(*wc), GFP_NOFS);
if (!wc) {
- btrfs_free_path(path);
ret = -ENOMEM;
- goto out;
+ goto out_free;
}
/*
@@ -6291,7 +6277,6 @@ out_end_trans:
btrfs_end_transaction_throttle(trans);
out_free:
- kfree(wc);
btrfs_free_path(path);
out:
if (!ret && root_dropped) {
@@ -6334,7 +6319,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
BTRFS_PATH_AUTO_FREE(path);
- struct walk_control *wc;
+ struct walk_control AUTO_KFREE(wc);
int level;
int parent_level;
int ret = 0;
@@ -6373,18 +6358,17 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
while (1) {
ret = walk_down_tree(trans, root, path, wc);
if (ret < 0)
- break;
+ return ret;
ret = walk_up_tree(trans, root, path, wc, parent_level);
if (ret) {
- if (ret > 0)
- ret = 0;
+ if (ret < 0)
+ return ret;
break;
}
}
- kfree(wc);
- return ret;
+ return 0;
}
/*
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index e970ac42a871..71bb8109c969 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -30,7 +30,6 @@ struct find_free_extent_ctl {
u64 min_alloc_size;
u64 empty_size;
u64 flags;
- int delalloc;
/* Where to start the search inside the bg */
u64 search_start;
@@ -40,6 +39,7 @@ struct find_free_extent_ctl {
struct btrfs_free_cluster *last_ptr;
bool use_cluster;
+ bool delalloc;
bool have_caching_bg;
bool orig_have_caching_bg;
@@ -49,6 +49,16 @@ struct find_free_extent_ctl {
/* Allocation is called for data relocation */
bool for_data_reloc;
+ /*
+ * Set to true if we're retrying the allocation on this block group
+ * after waiting for caching progress, this is so that we retry only
+ * once before moving on to another block group.
+ */
+ bool retry_uncached;
+
+ /* Whether or not the allocator is currently following a hint. */
+ bool hinted;
+
/* RAID index, converted from flags */
int index;
@@ -57,13 +67,6 @@ struct find_free_extent_ctl {
*/
int loop;
- /*
- * Set to true if we're retrying the allocation on this block group
- * after waiting for caching progress, this is so that we retry only
- * once before moving on to another block group.
- */
- bool retry_uncached;
-
/* If current block group is cached */
int cached;
@@ -82,9 +85,6 @@ struct find_free_extent_ctl {
/* Allocation policy */
enum btrfs_extent_allocation_policy policy;
- /* Whether or not the allocator is currently following a hint */
- bool hinted;
-
/* Size class of block groups to prefer in early loops */
enum btrfs_block_group_size_class size_class;
};
@@ -110,8 +110,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 offset, int metadata, u64 *refs, u64 *flags,
u64 *owner_root);
-int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
- int reserved);
+int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num);
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
const struct extent_buffer *eb);
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
@@ -138,7 +137,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data, int delalloc);
+ struct btrfs_key *ins, bool is_data, bool delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, bool full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 23273d0e6f22..629fd5af4286 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -333,7 +333,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
goto out;
}
range_start = max_t(u64, folio_pos(folio), start);
- range_len = min_t(u64, folio_end(folio), end + 1) - range_start;
+ range_len = min_t(u64, folio_next_pos(folio), end + 1) - range_start;
btrfs_folio_set_lock(fs_info, folio, range_start, range_len);
processed_end = range_start + range_len - 1;
@@ -374,8 +374,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u64 orig_start = *start;
const u64 orig_end = *end;
- /* The sanity tests may not set a valid fs_info. */
- u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+ u64 max_bytes = fs_info->max_extent_size;
u64 delalloc_start;
u64 delalloc_end;
bool found;
@@ -387,7 +386,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
ASSERT(orig_end > orig_start);
/* The range should at least cover part of the folio */
- ASSERT(!(orig_start >= folio_end(locked_folio) ||
+ ASSERT(!(orig_start >= folio_next_pos(locked_folio) ||
orig_end <= folio_pos(locked_folio)));
again:
/* step one, find a bunch of delalloc bytes starting at start */
@@ -493,7 +492,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
ASSERT(folio_pos(folio) <= start &&
- start + len <= folio_end(folio));
+ start + len <= folio_next_pos(folio));
if (uptodate && btrfs_verify_folio(folio, start, len))
btrfs_folio_set_uptodate(fs_info, folio, start, len);
@@ -518,7 +517,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
*/
static void end_bbio_data_write(struct btrfs_bio *bbio)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
@@ -574,7 +573,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio)
*/
static void end_bbio_data_read(struct btrfs_bio *bbio)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio *bio = &bbio->bio;
struct folio_iter fi;
@@ -739,12 +738,10 @@ static void alloc_new_bio(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_bio *bbio;
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, fs_info,
- bio_ctrl->end_io_func, NULL);
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, inode,
+ file_offset, bio_ctrl->end_io_func, NULL);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->bio.bi_write_hint = inode->vfs_inode.i_write_hint;
- bbio->inode = inode;
- bbio->file_offset = file_offset;
bio_ctrl->bbio = bbio;
bio_ctrl->len_to_oe_boundary = U32_MAX;
bio_ctrl->next_file_offset = file_offset;
@@ -1201,7 +1198,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* finished our folio read and unlocked the folio.
*/
if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_end(folio),
+ u64 range_len = umin(folio_next_pos(folio),
ordered->file_offset + ordered->num_bytes) - cur;
ret = true;
@@ -1223,7 +1220,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* So we return true and update @next_ret to the OE/folio boundary.
*/
if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_end(folio),
+ u64 range_len = umin(folio_next_pos(folio),
ordered->file_offset + ordered->num_bytes) - cur;
/*
@@ -1691,14 +1688,17 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
unsigned long range_bitmap = 0;
bool submitted_io = false;
int found_error = 0;
+ const u64 end = start + len;
const u64 folio_start = folio_pos(folio);
+ const u64 folio_end = folio_start + folio_size(folio);
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
u64 cur;
int bit;
int ret = 0;
- ASSERT(start >= folio_start &&
- start + len <= folio_start + folio_size(folio));
+ ASSERT(start >= folio_start, "start=%llu folio_start=%llu", start, folio_start);
+ ASSERT(end <= folio_end, "start=%llu len=%u folio_start=%llu folio_size=%zu",
+ start, len, folio_start, folio_size(folio));
ret = btrfs_writepage_cow_fixup(folio);
if (ret == -EAGAIN) {
@@ -1714,7 +1714,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
return ret;
}
- for (cur = start; cur < start + len; cur += fs_info->sectorsize)
+ for (cur = start; cur < end; cur += fs_info->sectorsize)
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
blocks_per_folio);
@@ -1725,8 +1725,24 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
if (cur >= i_size) {
+ struct btrfs_ordered_extent *ordered;
+
+ ordered = btrfs_lookup_first_ordered_range(inode, cur,
+ folio_end - cur);
+ /*
+ * We have just run delalloc before getting here, so
+ * there must be an ordered extent.
+ */
+ ASSERT(ordered != NULL);
+ spin_lock(&inode->ordered_tree_lock);
+ set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+ ordered->truncated_len = min(ordered->truncated_len,
+ cur - ordered->file_offset);
+ spin_unlock(&inode->ordered_tree_lock);
+ btrfs_put_ordered_extent(ordered);
+
btrfs_mark_ordered_io_finished(inode, folio, cur,
- start + len - cur, true);
+ end - cur, true);
/*
* This range is beyond i_size, thus we don't need to
* bother writing back.
@@ -1735,8 +1751,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* writeback the sectors with subpage dirty bits,
* causing writeback without ordered extent.
*/
- btrfs_folio_clear_dirty(fs_info, folio, cur,
- start + len - cur);
+ btrfs_folio_clear_dirty(fs_info, folio, cur, end - cur);
break;
}
ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
@@ -1856,7 +1871,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
folio_size(folio), bio_ctrl, i_size);
if (ret == 1)
return 0;
- if (ret < 0)
+ if (unlikely(ret < 0))
btrfs_err_rl(fs_info,
"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
btrfs_root_id(inode->root), btrfs_ino(inode),
@@ -2206,16 +2221,15 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
REQ_OP_WRITE | REQ_META | wbc_to_write_flags(wbc),
- eb->fs_info, end_bbio_meta_write, eb);
+ BTRFS_I(fs_info->btree_inode), eb->start,
+ end_bbio_meta_write, eb);
bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev);
wbc_init_bio(wbc, &bbio->bio);
- bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
- bbio->file_offset = eb->start;
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_end(folio),
+ u32 range_len = min_t(u64, folio_next_pos(folio),
eb->start + eb->len) - range_start;
folio_lock(folio);
@@ -2468,10 +2482,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
&BTRFS_I(inode)->runtime_flags))
wbc->tagged_writepages = 1;
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
+ tag = wbc_to_tag(wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
@@ -2627,7 +2638,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
continue;
}
- cur_end = min_t(u64, folio_end(folio) - 1, end);
+ cur_end = min_t(u64, folio_next_pos(folio) - 1, end);
cur_len = cur_end + 1 - cur;
ASSERT(folio_test_locked(folio));
@@ -3826,6 +3837,7 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
const struct btrfs_tree_parent_check *check)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_bio *bbio;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
@@ -3859,16 +3871,14 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
refcount_inc(&eb->refs);
bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
- REQ_OP_READ | REQ_META, eb->fs_info,
- end_bbio_meta_read, eb);
+ REQ_OP_READ | REQ_META, BTRFS_I(fs_info->btree_inode),
+ eb->start, end_bbio_meta_read, eb);
bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
- bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
- bbio->file_offset = eb->start;
memcpy(&bbio->parent_check, check, sizeof(*check));
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_end(folio),
+ u32 range_len = min_t(u64, folio_next_pos(folio),
eb->start + eb->len) - range_start;
bio_add_folio_nofail(&bbio->bio, folio, range_len,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5fcbfe44218c..02ebb2f238af 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -12,7 +12,6 @@
#include <linux/rwsem.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include "compression.h"
#include "messages.h"
#include "ulist.h"
#include "misc.h"
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index d4b81ee4d97b..6f685f3c9327 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -8,8 +8,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
#include <linux/refcount.h>
-#include "misc.h"
-#include "compression.h"
+#include "fs.h"
struct btrfs_inode;
struct btrfs_fs_info;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a42e6d54e7cd..14e5257f0f04 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -18,6 +18,7 @@
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
+#include "volumes.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
@@ -372,7 +373,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
return -ENOMEM;
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
- bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
+ bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS);
if (!bbio->csum)
return -ENOMEM;
} else {
@@ -393,8 +394,8 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(inode)) {
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
}
/*
@@ -422,8 +423,8 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
* from across transactions.
*/
if (bbio->csum_search_commit_root) {
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
down_read(&fs_info->commit_root_sem);
}
@@ -438,7 +439,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
if (count < 0) {
ret = count;
if (bbio->csum != bbio->csum_inline)
- kfree(bbio->csum);
+ kvfree(bbio->csum);
bbio->csum = NULL;
break;
}
@@ -764,21 +765,55 @@ fail:
return ret;
}
+static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
+{
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct bio *bio = &bbio->bio;
+ struct btrfs_ordered_sum *sums = bbio->sums;
+ struct bvec_iter iter = *src;
+ phys_addr_t paddr;
+ const u32 blocksize = fs_info->sectorsize;
+ const u32 step = min(blocksize, PAGE_SIZE);
+ const u32 nr_steps = blocksize / step;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+ u32 offset = 0;
+ int index = 0;
+
+ shash->tfm = fs_info->csum_shash;
+
+ btrfs_bio_for_each_block(paddr, bio, &iter, step) {
+ paddrs[(offset / step) % nr_steps] = paddr;
+ offset += step;
+
+ if (IS_ALIGNED(offset, blocksize)) {
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index);
+ index += fs_info->csum_size;
+ }
+ }
+}
+
+static void csum_one_bio_work(struct work_struct *work)
+{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work);
+
+ ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
+ ASSERT(bbio->async_csum == true);
+ csum_one_bio(bbio, &bbio->csum_saved_iter);
+ complete(&bbio->csum_done);
+}
+
/*
* Calculate checksums of the data contained inside a bio.
*/
-int btrfs_csum_one_bio(struct btrfs_bio *bbio)
+int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
- struct bvec_iter iter = bio->bi_iter;
- phys_addr_t paddr;
- const u32 blocksize = fs_info->sectorsize;
- int index;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@@ -789,21 +824,21 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
if (!sums)
return -ENOMEM;
+ sums->logical = bbio->orig_logical;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
-
- sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
- index = 0;
-
- shash->tfm = fs_info->csum_shash;
-
- btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
- btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
- index += fs_info->csum_size;
- }
-
bbio->sums = sums;
btrfs_add_ordered_sum(ordered, sums);
+
+ if (!async) {
+ csum_one_bio(bbio, &bbio->bio.bi_iter);
+ return 0;
+ }
+ init_completion(&bbio->csum_done);
+ bbio->async_csum = true;
+ bbio->csum_saved_iter = bbio->bio.bi_iter;
+ INIT_WORK(&bbio->csum_work, csum_one_bio_work);
+ schedule_work(&bbio->csum_work);
return 0;
}
@@ -1142,10 +1177,10 @@ again:
}
btrfs_release_path(path);
- path->search_for_extension = 1;
+ path->search_for_extension = true;
ret = btrfs_search_slot(trans, root, &file_key, path,
csum_size, 1);
- path->search_for_extension = 0;
+ path->search_for_extension = false;
if (ret < 0)
goto out;
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 63216c43676d..5645c5e3abdb 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <uapi/linux/btrfs_tree.h>
#include "ctree.h"
-#include "accessors.h"
+#include "ordered-data.h"
struct extent_map;
struct btrfs_file_extent_item;
@@ -64,7 +64,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct btrfs_bio *bbio);
+int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async);
int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa82def46e39..7a501e73d880 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -75,7 +75,7 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
- u64 end_pos = pos + write_bytes;
+ const u64 end_pos = pos + write_bytes;
loff_t isize = i_size_read(&inode->vfs_inode);
unsigned int extra_bits = 0;
@@ -86,10 +86,9 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos
extra_bits |= EXTENT_NORESERVE;
start_pos = round_down(pos, fs_info->sectorsize);
- num_bytes = round_up(write_bytes + pos - start_pos,
- fs_info->sectorsize);
+ num_bytes = round_up(end_pos - start_pos, fs_info->sectorsize);
ASSERT(num_bytes <= U32_MAX);
- ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes);
+ ASSERT(folio_pos(folio) <= pos && folio_next_pos(folio) >= end_pos);
end_of_last_block = start_pos + num_bytes - 1;
@@ -799,7 +798,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
u64 len)
{
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
- u64 clamp_end = min_t(u64, pos + len, folio_end(folio));
+ u64 clamp_end = min_t(u64, pos + len, folio_next_pos(folio));
const u32 blocksize = inode_to_fs_info(inode)->sectorsize;
int ret = 0;
@@ -1254,8 +1253,8 @@ again:
* The reserved range goes beyond the current folio, shrink the reserved
* space to the folio boundary.
*/
- if (reserved_start + reserved_len > folio_end(folio)) {
- const u64 last_block = folio_end(folio);
+ if (reserved_start + reserved_len > folio_next_pos(folio)) {
+ const u64 last_block = folio_next_pos(folio);
shrink_reserved_space(inode, *data_reserved, reserved_start,
reserved_len, last_block - reserved_start,
@@ -1441,6 +1440,8 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written, num_sync;
+ if (unlikely(btrfs_is_shutdown(inode->root->fs_info)))
+ return -EIO;
/*
* If the fs flips readonly due to some impossible error, although we
* have opened a file as writable, we have to stop this write operation
@@ -2043,6 +2044,8 @@ static int btrfs_file_mmap_prepare(struct vm_area_desc *desc)
struct file *filp = desc->file;
struct address_space *mapping = filp->f_mapping;
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(filp)))))
+ return -EIO;
if (!mapping->a_ops->read_folio)
return -ENOEXEC;
@@ -3112,6 +3115,9 @@ static long btrfs_fallocate(struct file *file, int mode,
int blocksize = BTRFS_I(inode)->root->fs_info->sectorsize;
int ret;
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
+ return -EIO;
+
/* Do not allow fallocate in ZONED mode */
if (btrfs_is_zoned(inode_to_fs_info(inode)))
return -EOPNOTSUPP;
@@ -3803,6 +3809,9 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
+ return -EIO;
+
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
@@ -3815,6 +3824,9 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t ret = 0;
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(iocb->ki_filp)))))
+ return -EIO;
+
if (iocb->ki_flags & IOCB_DIRECT) {
ret = btrfs_direct_read(iocb, to);
if (ret < 0 || !iov_iter_count(to) ||
@@ -3825,10 +3837,20 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return filemap_read(iocb, to, ret);
}
+static ssize_t btrfs_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(in)))))
+ return -EIO;
+
+ return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = btrfs_file_read_iter,
- .splice_read = filemap_splice_read,
+ .splice_read = btrfs_file_splice_read,
.write_iter = btrfs_file_write_iter,
.splice_write = iter_file_splice_write,
.mmap_prepare = btrfs_file_mmap_prepare,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ab873bd67192..f0f72850fab2 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -968,8 +968,8 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
path = btrfs_alloc_path();
if (!path)
return 0;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
/*
* We must pass a path with search_commit_root set to btrfs_iget in
@@ -3656,7 +3656,7 @@ static int do_trimming(struct btrfs_block_group *block_group,
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int ret;
- int update = 0;
+ bool bg_ro;
const u64 end = start + bytes;
const u64 reserved_end = reserved_start + reserved_bytes;
enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
@@ -3664,12 +3664,14 @@ static int do_trimming(struct btrfs_block_group *block_group,
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- if (!block_group->ro) {
+ bg_ro = block_group->ro;
+ if (!bg_ro) {
block_group->reserved += reserved_bytes;
+ spin_unlock(&block_group->lock);
space_info->bytes_reserved += reserved_bytes;
- update = 1;
+ } else {
+ spin_unlock(&block_group->lock);
}
- spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
@@ -3690,14 +3692,16 @@ static int do_trimming(struct btrfs_block_group *block_group,
list_del(&trim_entry->list);
mutex_unlock(&ctl->cache_writeout_mutex);
- if (update) {
+ if (!bg_ro) {
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- if (block_group->ro)
- space_info->bytes_readonly += reserved_bytes;
+ bg_ro = block_group->ro;
block_group->reserved -= reserved_bytes;
- space_info->bytes_reserved -= reserved_bytes;
spin_unlock(&block_group->lock);
+
+ space_info->bytes_reserved -= reserved_bytes;
+ if (bg_ro)
+ space_info->bytes_readonly += reserved_bytes;
spin_unlock(&space_info->lock);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index d86541073d42..1ad2ad384b9e 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -165,11 +165,9 @@ static unsigned long *alloc_bitmap(u32 bitmap_size)
/*
* GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
- * into the filesystem as the free space bitmap can be modified in the
- * critical section of a transaction commit.
- *
- * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
- * know that recursion is unsafe.
+ * into the filesystem here. All callers hold a transaction handle
+ * open, so if a GFP_KERNEL allocation recurses into the filesystem
+ * and triggers a transaction commit, we would deadlock.
*/
nofs_flag = memalloc_nofs_save();
ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
@@ -218,11 +216,8 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
- if (unlikely(!bitmap)) {
- ret = -ENOMEM;
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
+ if (unlikely(!bitmap))
+ return 0;
start = block_group->start;
end = block_group->start + block_group->length;
@@ -361,11 +356,8 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
- if (unlikely(!bitmap)) {
- ret = -ENOMEM;
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
+ if (unlikely(!bitmap))
+ return 0;
start = block_group->start;
end = block_group->start + block_group->length;
@@ -841,7 +833,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
struct btrfs_block_group *block_group;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
@@ -851,7 +843,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
@@ -859,7 +851,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
mutex_lock(&block_group->free_space_lock);
@@ -869,8 +861,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -1023,7 +1014,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
struct btrfs_block_group *block_group;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
@@ -1033,7 +1024,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
@@ -1041,7 +1032,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
mutex_lock(&block_group->free_space_lock);
@@ -1051,8 +1042,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -1466,7 +1456,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
u64 start, end;
@@ -1485,7 +1475,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
start = block_group->start;
@@ -1499,7 +1489,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
leaf = path->nodes[0];
@@ -1530,14 +1520,13 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
btrfs_release_path(path);
}
ret = 0;
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -1702,8 +1691,8 @@ int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
* Just like caching_thread() doesn't want to deadlock on the extent
* tree, we don't want to deadlock on the free space tree.
*/
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
path->reada = READA_FORWARD;
info = btrfs_search_free_space_info(NULL, block_group, path, 0);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 814bbc9417d2..0f7e1ef27891 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -29,6 +29,7 @@
#include "extent-io-tree.h"
#include "async-thread.h"
#include "block-rsv.h"
+#include "messages.h"
struct inode;
struct super_block;
@@ -73,6 +74,13 @@ struct btrfs_space_info;
#define BTRFS_SUPER_INFO_SIZE 4096
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
+/* Array of bytes with variable length, hexadecimal format 0x1234 */
+#define BTRFS_CSUM_FMT "0x%*phN"
+#define BTRFS_CSUM_FMT_VALUE(size, bytes) size, bytes
+
+#define BTRFS_KEY_FMT "(%llu %u %llu)"
+#define BTRFS_KEY_FMT_VALUE(key) (key)->objectid, (key)->type, (key)->offset
+
/*
* Number of metadata items necessary for an unlink operation:
*
@@ -124,6 +132,12 @@ enum {
/* No more delayed iput can be queued. */
BTRFS_FS_STATE_NO_DELAYED_IPUT,
+ /*
+ * Emergency shutdown, a step further than transaction aborted by
+ * rejecting all operations.
+ */
+ BTRFS_FS_STATE_EMERGENCY_SHUTDOWN,
+
BTRFS_FS_STATE_COUNT
};
@@ -644,7 +658,6 @@ struct btrfs_fs_info {
struct workqueue_struct *endio_workers;
struct workqueue_struct *endio_meta_workers;
struct workqueue_struct *rmw_workers;
- struct workqueue_struct *compressed_write_workers;
struct btrfs_workqueue *endio_write_workers;
struct btrfs_workqueue *endio_freespace_worker;
struct btrfs_workqueue *caching_workers;
@@ -1120,6 +1133,27 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
&(fs_info)->fs_state)))
+static inline bool btrfs_is_shutdown(struct btrfs_fs_info *fs_info)
+{
+ return test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state);
+}
+
+static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info)
+{
+ /*
+ * Here we do not want to use handle_fs_error(), which will mark the fs
+ * read-only.
+ * Some call sites like shutdown ioctl will mark the fs shutdown when
+ * the fs is frozen. But thaw path will handle RO and RW fs
+ * differently.
+ *
+ * So here we only mark the fs error without flipping it RO.
+ */
+ WRITE_ONCE(fs_info->fs_error, -EIO);
+ if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state))
+ btrfs_crit(fs_info, "emergency shutdown");
+}
+
/*
* We use folio flag owner_2 to indicate there is an ordered extent with
* unfinished IO.
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 1bd73b80f9fa..b73e1dd97208 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -312,7 +312,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- path->skip_release_on_error = 1;
+ path->skip_release_on_error = true;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
@@ -444,7 +444,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_truncate_control *control)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
@@ -730,6 +730,5 @@ out:
if (!ret && control->last_size > new_size)
control->last_size = new_size;
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6282911e536f..c4bee47829ed 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9,6 +9,7 @@
#include <linux/blk-cgroup.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fs_struct.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
@@ -71,6 +72,7 @@
#include "backref.h"
#include "raid-stripe-tree.h"
#include "fiemap.h"
+#include "delayed-inode.h"
#define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0)
#define COW_FILE_RANGE_NO_INLINE (1UL << 1)
@@ -130,7 +132,7 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
struct btrfs_fs_info *fs_info = warn->fs_info;
struct extent_buffer *eb;
struct btrfs_inode_item *inode_item;
- struct inode_fs_paths *ipath = NULL;
+ struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL;
struct btrfs_root *local_root;
struct btrfs_key key;
unsigned int nofs_flag;
@@ -195,7 +197,6 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
}
btrfs_put_root(local_root);
- free_ipath(ipath);
return 0;
err:
@@ -203,7 +204,6 @@ err:
"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d",
warn->logical, warn->mirror_num, root, inum, offset, ret);
- free_ipath(ipath);
return ret;
}
@@ -235,21 +235,21 @@ static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off
if (logical == U64_MAX) {
btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation");
btrfs_warn_rl(fs_info,
-"csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+"csum failed root %lld ino %llu off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d",
btrfs_root_id(inode->root), btrfs_ino(inode), file_off,
- CSUM_FMT_VALUE(csum_size, csum),
- CSUM_FMT_VALUE(csum_size, csum_expected),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
return;
}
logical += file_off;
btrfs_warn_rl(fs_info,
-"csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+"csum failed root %lld ino %llu off %llu logical %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d",
btrfs_root_id(inode->root),
btrfs_ino(inode), file_off, logical,
- CSUM_FMT_VALUE(csum_size, csum),
- CSUM_FMT_VALUE(csum_size, csum_expected),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags);
@@ -320,19 +320,19 @@ static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
/* Output without objectid, which is more meaningful */
if (btrfs_root_id(root) >= BTRFS_LAST_FREE_OBJECTID) {
btrfs_warn_rl(root->fs_info,
-"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+"csum failed root %lld ino %lld off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d",
btrfs_root_id(root), btrfs_ino(inode),
logical_start,
- CSUM_FMT_VALUE(csum_size, csum),
- CSUM_FMT_VALUE(csum_size, csum_expected),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
} else {
btrfs_warn_rl(root->fs_info,
-"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+"csum failed root %llu ino %llu off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d",
btrfs_root_id(root), btrfs_ino(inode),
logical_start,
- CSUM_FMT_VALUE(csum_size, csum),
- CSUM_FMT_VALUE(csum_size, csum_expected),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
}
}
@@ -411,7 +411,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
continue;
}
- index = folio_end(folio) >> PAGE_SHIFT;
+ index = folio_next_index(folio);
/*
* Here we just clear all Ordered bits for every page in the
* range, then btrfs_mark_ordered_io_finished() will handle
@@ -593,6 +593,10 @@ static bool can_cow_file_range_inline(struct btrfs_inode *inode,
if (size < i_size_read(&inode->vfs_inode))
return false;
+ /* Encrypted file cannot be inlined. */
+ if (IS_ENCRYPTED(&inode->vfs_inode))
+ return false;
+
return true;
}
@@ -864,7 +868,7 @@ static void compress_file_range(struct btrfs_work *work)
u64 actual_end;
u64 i_size;
int ret = 0;
- struct folio **folios;
+ struct folio **folios = NULL;
unsigned long nr_folios;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
@@ -873,6 +877,9 @@ static void compress_file_range(struct btrfs_work *work)
int compress_type = fs_info->compress_type;
int compress_level = fs_info->compress_level;
+ if (unlikely(btrfs_is_shutdown(fs_info)))
+ goto cleanup_and_bail_uncompressed;
+
inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
/*
@@ -1134,7 +1141,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
- 0, *alloc_hint, &ins, 1, 1);
+ 0, *alloc_hint, &ins, true, true);
if (ret) {
/*
* We can't reserve contiguous space for the compressed size.
@@ -1288,6 +1295,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
unsigned long page_ops;
int ret = 0;
+ if (unlikely(btrfs_is_shutdown(fs_info))) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
if (btrfs_is_free_space_inode(inode)) {
ret = -EINVAL;
goto out_unlock;
@@ -1352,7 +1364,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
ret = btrfs_reserve_extent(root, num_bytes, num_bytes,
min_alloc_size, 0, alloc_hint,
- &ins, 1, 1);
+ &ins, true, true);
if (ret == -EAGAIN) {
/*
* btrfs_reserve_extent only returns -EAGAIN for zoned
@@ -2006,7 +2018,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
- struct btrfs_path *path;
+ struct btrfs_path *path = NULL;
u64 cow_start = (u64)-1;
/*
* If not 0, represents the inclusive end of the last fallback_to_cow()
@@ -2036,6 +2048,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
*/
ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
+ if (unlikely(btrfs_is_shutdown(fs_info))) {
+ ret = -EIO;
+ goto error;
+ }
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
@@ -2338,7 +2354,8 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
* The range must cover part of the @locked_folio, or a return of 1
* can confuse the caller.
*/
- ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio)));
+ ASSERT(!(end <= folio_pos(locked_folio) ||
+ start >= folio_next_pos(locked_folio)));
if (should_nocow(inode, start, end)) {
ret = run_delalloc_nocow(inode, locked_folio, start, end);
@@ -2745,7 +2762,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct btrfs_inode *inode = fixup->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 page_start = folio_pos(folio);
- u64 page_end = folio_end(folio) - 1;
+ u64 page_end = folio_next_pos(folio) - 1;
int ret = 0;
bool free_delalloc_space = true;
@@ -3332,36 +3349,67 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
return btrfs_finish_one_ordered(ordered);
}
-void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
- u8 *dest)
+/*
+ * Calculate the checksum of an fs block at physical memory address @paddr,
+ * and save the result to @dest.
+ *
+ * The folio containing @paddr must be large enough to contain a full fs block.
+ */
+void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info,
+ const phys_addr_t paddr, u8 *dest)
{
struct folio *folio = page_folio(phys_to_page(paddr));
const u32 blocksize = fs_info->sectorsize;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ const u32 step = min(blocksize, PAGE_SIZE);
+ const u32 nr_steps = blocksize / step;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
- shash->tfm = fs_info->csum_shash;
/* The full block must be inside the folio. */
ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
- if (folio_test_partial_kmap(folio)) {
- size_t cur = paddr;
+ for (int i = 0; i < nr_steps; i++) {
+ u32 pindex = offset_in_folio(folio, paddr + i * step) >> PAGE_SHIFT;
- crypto_shash_init(shash);
- while (cur < paddr + blocksize) {
- void *kaddr;
- size_t len = min(paddr + blocksize - cur,
- PAGE_SIZE - offset_in_page(cur));
+ /*
+ * For bs <= ps cases, we will only run the loop once, so the offset
+ * inside the page will only added to paddrs[0].
+ *
+ * For bs > ps cases, the block must be page aligned, thus offset
+ * inside the page will always be 0.
+ */
+ paddrs[i] = page_to_phys(folio_page(folio, pindex)) + offset_in_page(paddr);
+ }
+ return btrfs_calculate_block_csum_pages(fs_info, paddrs, dest);
+}
- kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur));
- crypto_shash_update(shash, kaddr, len);
- kunmap_local(kaddr);
- cur += len;
- }
- crypto_shash_final(shash, dest);
- } else {
- crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest);
+/*
+ * Calculate the checksum of a fs block backed by multiple noncontiguous pages
+ * at @paddrs[] and save the result to @dest.
+ *
+ * The folio containing @paddr must be large enough to contain a full fs block.
+ */
+void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info,
+ const phys_addr_t paddrs[], u8 *dest)
+{
+ const u32 blocksize = fs_info->sectorsize;
+ const u32 step = min(blocksize, PAGE_SIZE);
+ const u32 nr_steps = blocksize / step;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
+ for (int i = 0; i < nr_steps; i++) {
+ const phys_addr_t paddr = paddrs[i];
+ void *kaddr;
+
+ ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE);
+ kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr);
+ crypto_shash_update(shash, kaddr, step);
+ kunmap_local(kaddr);
}
+ crypto_shash_final(shash, dest);
}
+
/*
* Verify the checksum for a single sector without any extra action that depend
* on the type of I/O.
@@ -3371,19 +3419,20 @@ void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr
int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
const u8 * const csum_expected)
{
- btrfs_calculate_block_csum(fs_info, paddr, csum);
+ btrfs_calculate_block_csum_folio(fs_info, paddr, csum);
if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0))
return -EIO;
return 0;
}
/*
- * Verify the checksum of a single data sector.
+ * Verify the checksum of a single data sector, which can be scattered at
+ * different noncontiguous pages.
*
* @bbio: btrfs_io_bio which contains the csum
* @dev: device the sector is on
* @bio_offset: offset to the beginning of the bio (in bytes)
- * @bv: bio_vec to check
+ * @paddrs: physical addresses which back the fs block
*
* Check if the checksum on a data block is valid. When a checksum mismatch is
* detected, report the error and fill the corrupted range with zero.
@@ -3391,12 +3440,13 @@ int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8
* Return %true if the sector is ok or had no checksum to start with, else %false.
*/
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
- u32 bio_offset, phys_addr_t paddr)
+ u32 bio_offset, const phys_addr_t paddrs[])
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
const u32 blocksize = fs_info->sectorsize;
- struct folio *folio;
+ const u32 step = min(blocksize, PAGE_SIZE);
+ const u32 nr_steps = blocksize / step;
u64 file_offset = bbio->file_offset + bio_offset;
u64 end = file_offset + blocksize - 1;
u8 *csum_expected;
@@ -3416,7 +3466,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) *
fs_info->csum_size;
- if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected))
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, csum);
+ if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0))
goto zeroit;
return true;
@@ -3425,9 +3476,8 @@ zeroit:
bbio->mirror_num);
if (dev)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
- folio = page_folio(phys_to_page(paddr));
- ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
- folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize);
+ for (int i = 0; i < nr_steps; i++)
+ memzero_page(phys_to_page(paddrs[i]), offset_in_page(paddrs[i]), step);
return false;
}
@@ -3886,7 +3936,7 @@ static int btrfs_add_inode_to_root(struct btrfs_inode *inode, bool prealloc)
ASSERT(ret != -ENOMEM);
return ret;
} else if (existing) {
- WARN_ON(!(existing->vfs_inode.i_state & (I_WILL_FREE | I_FREEING)));
+ WARN_ON(!(inode_state_read_once(&existing->vfs_inode) & (I_WILL_FREE | I_FREEING)));
}
return 0;
@@ -4314,8 +4364,8 @@ skip_backref:
* operations on the log tree, increasing latency for applications.
*/
if (!rename_ctx) {
- btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino);
- btrfs_del_dir_entries_in_log(trans, root, name, dir, index);
+ btrfs_del_inode_ref_in_log(trans, name, inode, dir);
+ btrfs_del_dir_entries_in_log(trans, name, dir, index);
}
/*
@@ -4414,7 +4464,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = dir->root;
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
@@ -4507,7 +4557,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
if (ret)
btrfs_abort_transaction(trans, ret);
out:
- btrfs_free_path(path);
fscrypt_free_filename(&fname);
return ret;
}
@@ -4857,7 +4906,7 @@ again:
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = folio_end(folio);
+ zero_end = folio_next_pos(folio);
folio_zero_range(folio, zero_start - folio_pos(folio),
zero_end - zero_start);
@@ -5040,7 +5089,7 @@ again:
* not reach disk, it still affects our page caches.
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = min_t(u64, folio_end(folio) - 1, end);
+ zero_end = min_t(u64, folio_next_pos(folio) - 1, end);
} else {
zero_start = max_t(u64, block_start, start);
zero_end = min_t(u64, block_end, end);
@@ -5363,7 +5412,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct rb_node *node;
- ASSERT(inode->i_state & I_FREEING);
+ ASSERT(inode_state_read_once(inode) & I_FREEING);
truncate_inode_pages_final(&inode->i_data);
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);
@@ -5632,9 +5681,9 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
location->type != BTRFS_ROOT_ITEM_KEY)) {
ret = -EUCLEAN;
btrfs_warn(root->fs_info,
-"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location " BTRFS_KEY_FMT ")",
__func__, fname.disk_name.name, btrfs_ino(dir),
- location->objectid, location->type, location->offset);
+ BTRFS_KEY_FMT_VALUE(location));
}
if (!ret)
*type = btrfs_dir_ftype(path->nodes[0], di);
@@ -5801,7 +5850,7 @@ struct btrfs_inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->vfs_inode.i_state & I_NEW))
+ if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW))
return inode;
ret = btrfs_read_locked_inode(inode, path);
@@ -5825,7 +5874,7 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->vfs_inode.i_state & I_NEW))
+ if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW))
return inode;
path = btrfs_alloc_path();
@@ -5839,6 +5888,8 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
if (ret)
return ERR_PTR(ret);
+ if (S_ISDIR(inode->vfs_inode.i_mode))
+ inode->vfs_inode.i_opflags |= IOP_FASTPERM_MAY_EXEC;
unlock_new_inode(&inode->vfs_inode);
return inode;
}
@@ -6291,8 +6342,8 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
}
/*
- * This is a copy of file_update_time. We need this so we can return error on
- * ENOSPC for updating the inode in the case of file write and mmap writes.
+ * We need our own ->update_time so that we can return error on ENOSPC for
+ * updating the inode in the case of file write and mmap writes.
*/
static int btrfs_update_time(struct inode *inode, int flags)
{
@@ -6790,8 +6841,11 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
}
ret = btrfs_create_new_inode(trans, &new_inode_args);
- if (!ret)
+ if (!ret) {
+ if (S_ISDIR(inode->i_mode))
+ inode->i_opflags |= IOP_FASTPERM_MAY_EXEC;
d_instantiate_new(dentry, inode);
+ }
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -7067,8 +7121,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
* point the commit_root has everything we need.
*/
if (btrfs_is_free_space_inode(inode)) {
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
}
ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
@@ -7481,7 +7535,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
u64 page_start = folio_pos(folio);
u64 page_end = page_start + folio_size(folio) - 1;
u64 cur;
- int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
+ int inode_evicting = inode_state_read_once(&inode->vfs_inode) & I_FREEING;
/*
* We have folio locked so no new ordered extent can be created on this
@@ -7578,11 +7632,11 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, &cached_state);
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
ordered->truncated_len = min(ordered->truncated_len,
cur - ordered->file_offset);
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
/*
* If the ordered extent has finished, we're safe to delete all
@@ -7644,19 +7698,22 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
.ino = btrfs_ino(inode),
.min_type = BTRFS_EXTENT_DATA_KEY,
.clear_extent_range = true,
+ .new_size = inode->vfs_inode.i_size,
};
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv rsv;
int ret;
struct btrfs_trans_handle *trans;
- u64 mask = fs_info->sectorsize - 1;
const u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
+ const u64 lock_start = round_down(inode->vfs_inode.i_size, fs_info->sectorsize);
+ const u64 i_size_up = round_up(inode->vfs_inode.i_size, fs_info->sectorsize);
+
+ /* Our inode is locked and the i_size can't be changed concurrently. */
+ btrfs_assert_inode_locked(inode);
if (!skip_writeback) {
- ret = btrfs_wait_ordered_range(inode,
- inode->vfs_inode.i_size & (~mask),
- (u64)-1);
+ ret = btrfs_wait_ordered_range(inode, lock_start, (u64)-1);
if (ret)
return ret;
}
@@ -7720,19 +7777,14 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
while (1) {
struct extent_state *cached_state = NULL;
- const u64 new_size = inode->vfs_inode.i_size;
- const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
- control.new_size = new_size;
btrfs_lock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state);
/*
* We want to drop from the next block forward in case this new
* size is not block aligned since we will be keeping the last
* block of the extent just the way it is.
*/
- btrfs_drop_extent_map_range(inode,
- ALIGN(new_size, fs_info->sectorsize),
- (u64)-1, false);
+ btrfs_drop_extent_map_range(inode, i_size_up, (u64)-1, false);
ret = btrfs_truncate_inode_items(trans, root, &control);
@@ -8710,15 +8762,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int start_delalloc_inodes(struct btrfs_root *root,
- struct writeback_control *wbc, bool snapshot,
- bool in_reclaim_context)
+static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write,
+ bool snapshot, bool in_reclaim_context)
{
struct btrfs_delalloc_work *work, *next;
LIST_HEAD(works);
LIST_HEAD(splice);
int ret = 0;
- bool full_flush = wbc->nr_to_write == LONG_MAX;
mutex_lock(&root->delalloc_mutex);
spin_lock(&root->delalloc_lock);
@@ -8744,10 +8794,10 @@ static int start_delalloc_inodes(struct btrfs_root *root,
if (snapshot)
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags);
- if (full_flush) {
- work = btrfs_alloc_delalloc_work(&inode->vfs_inode);
+ if (nr_to_write == NULL) {
+ work = btrfs_alloc_delalloc_work(tmp_inode);
if (!work) {
- iput(&inode->vfs_inode);
+ iput(tmp_inode);
ret = -ENOMEM;
goto out;
}
@@ -8755,9 +8805,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
btrfs_queue_work(root->fs_info->flush_workers,
&work->work);
} else {
- ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc);
+ ret = filemap_flush_nr(tmp_inode->i_mapping,
+ nr_to_write);
btrfs_add_delayed_iput(inode);
- if (ret || wbc->nr_to_write <= 0)
+
+ if (ret || *nr_to_write <= 0)
goto out;
}
cond_resched();
@@ -8783,29 +8835,17 @@ out:
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{
- struct writeback_control wbc = {
- .nr_to_write = LONG_MAX,
- .sync_mode = WB_SYNC_NONE,
- .range_start = 0,
- .range_end = LLONG_MAX,
- };
struct btrfs_fs_info *fs_info = root->fs_info;
if (BTRFS_FS_ERROR(fs_info))
return -EROFS;
-
- return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
+ return start_delalloc_inodes(root, NULL, true, in_reclaim_context);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
bool in_reclaim_context)
{
- struct writeback_control wbc = {
- .nr_to_write = nr,
- .sync_mode = WB_SYNC_NONE,
- .range_start = 0,
- .range_end = LLONG_MAX,
- };
+ long *nr_to_write = nr == LONG_MAX ? NULL : &nr;
struct btrfs_root *root;
LIST_HEAD(splice);
int ret;
@@ -8817,13 +8857,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
while (!list_empty(&splice)) {
- /*
- * Reset nr_to_write here so we know that we're doing a full
- * flush.
- */
- if (nr == LONG_MAX)
- wbc.nr_to_write = LONG_MAX;
-
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
root = btrfs_grab_root(root);
@@ -8832,9 +8865,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
+ ret = start_delalloc_inodes(root, nr_to_write, false,
+ in_reclaim_context);
btrfs_put_root(root);
- if (ret < 0 || wbc.nr_to_write <= 0)
+ if (ret < 0 || nr <= 0)
goto out;
spin_lock(&fs_info->delalloc_root_lock);
}
@@ -9064,7 +9098,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
*/
cur_bytes = min(cur_bytes, last_alloc);
ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
- min_size, 0, *alloc_hint, &ins, 1, 0);
+ min_size, 0, *alloc_hint, &ins, true, false);
if (ret)
break;
@@ -9170,6 +9204,11 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
+/*
+ * NOTE: in case you are adding MAY_EXEC check for directories:
+ * we are marking them with IOP_FASTPERM_MAY_EXEC, allowing path lookup to
+ * elide calls here.
+ */
static int btrfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
@@ -9395,7 +9434,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 disk_bytenr, u64 disk_io_size,
struct page **pages, void *uring_ctx)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_encoded_read_private *priv, sync_priv;
struct completion sync_reads;
unsigned long i = 0;
@@ -9420,10 +9458,9 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
priv->status = 0;
priv->uring_ctx = uring_ctx;
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0,
btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
- bbio->inode = inode;
do {
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
@@ -9432,10 +9469,9 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0,
btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
- bbio->inode = inode;
continue;
}
@@ -9826,8 +9862,6 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
}
for (;;) {
- struct btrfs_ordered_extent *ordered;
-
ret = btrfs_wait_ordered_range(inode, start, num_bytes);
if (ret)
goto out_folios;
@@ -9877,7 +9911,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
}
ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes,
- disk_num_bytes, 0, 0, &ins, 1, 1);
+ disk_num_bytes, 0, 0, &ins, true, true);
if (ret)
goto out_delalloc_release;
extent_reserved = true;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8cb7d5a462ef..acb484546b1d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -503,7 +503,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_key key;
- struct btrfs_root_item *root_item;
+ struct btrfs_root_item AUTO_KFREE(root_item);
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -527,20 +527,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
ret = btrfs_get_free_objectid(fs_info->tree_root, &objectid);
if (ret)
- goto out_root_item;
+ return ret;
/*
* Don't create subvolume whose level is not zero. Or qgroup will be
* screwed up since it assumes subvolume qgroup's level to be 0.
*/
- if (btrfs_qgroup_level(objectid)) {
- ret = -ENOSPC;
- goto out_root_item;
- }
+ if (btrfs_qgroup_level(objectid))
+ return -ENOSPC;
ret = get_anon_bdev(&anon_dev);
if (ret < 0)
- goto out_root_item;
+ return ret;
new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir);
if (!new_inode_args.inode) {
@@ -692,8 +690,7 @@ out_inode:
out_anon_dev:
if (anon_dev)
free_anon_bdev(anon_dev);
-out_root_item:
- kfree(root_item);
+
return ret;
}
@@ -904,14 +901,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
int ret;
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- return ret;
-
- dentry = lookup_one(idmap, qname, parent);
- ret = PTR_ERR(dentry);
+ dentry = start_creating_killable(idmap, parent, qname);
if (IS_ERR(dentry))
- goto out_unlock;
+ return PTR_ERR(dentry);
ret = btrfs_may_create(idmap, dir, dentry);
if (ret)
@@ -940,9 +932,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
up_read(&fs_info->subvol_sem);
out_dput:
- dput(dentry);
-out_unlock:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+ end_creating(dentry);
return ret;
}
@@ -1606,7 +1596,7 @@ static noinline int search_ioctl(struct btrfs_root *root,
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_key key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
int num_found = 0;
unsigned long sk_offset = 0;
@@ -1626,10 +1616,8 @@ static noinline int search_ioctl(struct btrfs_root *root,
} else {
/* Look up the root from the arguments. */
root = btrfs_get_fs_root(info, sk->tree_id, true);
- if (IS_ERR(root)) {
- btrfs_free_path(path);
+ if (IS_ERR(root))
return PTR_ERR(root);
- }
}
key.objectid = sk->min_objectid;
@@ -1663,7 +1651,6 @@ static noinline int search_ioctl(struct btrfs_root *root,
sk->nr_items = num_found;
btrfs_put_root(root);
- btrfs_free_path(path);
return ret;
}
@@ -1746,7 +1733,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
int total_len = 0;
struct btrfs_inode_ref *iref;
struct extent_buffer *l;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
name[0]='\0';
@@ -1807,7 +1794,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
ret = 0;
out:
btrfs_put_root(root);
- btrfs_free_path(path);
return ret;
}
@@ -1824,8 +1810,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
struct btrfs_inode_ref *iref;
struct btrfs_root_ref *rref;
struct btrfs_root *root = NULL;
- struct btrfs_path *path;
- struct btrfs_key key, key2;
+ BTRFS_PATH_AUTO_FREE(path);
+ struct btrfs_key key;
struct extent_buffer *leaf;
char *ptr;
int slot;
@@ -1845,10 +1831,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
root = btrfs_get_fs_root(fs_info, treeid, true);
- if (IS_ERR(root)) {
- ret = PTR_ERR(root);
- goto out;
- }
+ if (IS_ERR(root))
+ return PTR_ERR(root);
key.objectid = dirid;
key.type = BTRFS_INODE_REF_KEY;
@@ -1880,24 +1864,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
read_extent_buffer(leaf, ptr,
(unsigned long)(iref + 1), len);
- /* Check the read+exec permission of this directory */
- ret = btrfs_previous_item(root, path, dirid,
- BTRFS_INODE_ITEM_KEY);
- if (ret < 0) {
- goto out_put;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto out_put;
- }
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(leaf, &key2, slot);
- if (key2.objectid != dirid) {
- ret = -ENOENT;
- goto out_put;
- }
-
/*
* We don't need the path anymore, so release it and
* avoid deadlocks and lockdep warnings in case
@@ -1905,18 +1871,17 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
* btree and lock the same leaf.
*/
btrfs_release_path(path);
- temp_inode = btrfs_iget(key2.objectid, root);
+ temp_inode = btrfs_iget(key.offset, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
goto out_put;
}
+ /* Check the read+exec permission of this directory. */
ret = inode_permission(idmap, &temp_inode->vfs_inode,
MAY_READ | MAY_EXEC);
iput(&temp_inode->vfs_inode);
- if (ret) {
- ret = -EACCES;
+ if (ret)
goto out_put;
- }
if (key.offset == upper_limit)
break;
@@ -1942,12 +1907,10 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
key.type = BTRFS_ROOT_REF_KEY;
key.offset = args->treeid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
- if (ret < 0) {
- goto out;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return -ENOENT;
leaf = path->nodes[0];
slot = path->slots[0];
@@ -1957,10 +1920,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
item_len = btrfs_item_size(leaf, slot);
/* Check if dirid in ROOT_REF corresponds to passed dirid */
rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
- if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
- ret = -EINVAL;
- goto out;
- }
+ if (args->dirid != btrfs_root_ref_dirid(leaf, rref))
+ return -EINVAL;
/* Copy subvolume's name */
item_off += sizeof(struct btrfs_root_ref);
@@ -1970,8 +1931,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
out_put:
btrfs_put_root(root);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -2417,18 +2377,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto free_subvol_name;
}
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- goto free_subvol_name;
- dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
+ dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
- goto out_unlock_dir;
- }
-
- if (d_really_is_negative(dentry)) {
- ret = -ENOENT;
- goto out_dput;
+ goto out_end_removing;
}
inode = d_inode(dentry);
@@ -2449,7 +2401,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EPERM;
if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
- goto out_dput;
+ goto out_end_removing;
/*
* Do not allow deletion if the parent dir is the same
@@ -2460,21 +2412,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EINVAL;
if (root == dest)
- goto out_dput;
+ goto out_end_removing;
ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
if (ret)
- goto out_dput;
+ goto out_end_removing;
}
/* check if subvolume may be deleted by a user */
ret = btrfs_may_delete(idmap, dir, dentry, 1);
if (ret)
- goto out_dput;
+ goto out_end_removing;
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
- goto out_dput;
+ goto out_end_removing;
}
btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2435,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (!ret)
d_delete_notify(dir, dentry);
-out_dput:
- dput(dentry);
-out_unlock_dir:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+out_end_removing:
+ end_removing(dentry);
free_subvol_name:
kfree(subvol_name_ptr);
free_parent:
@@ -2956,7 +2906,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_space_args space_args = { 0 };
struct btrfs_ioctl_space_info space;
struct btrfs_ioctl_space_info *dest;
- struct btrfs_ioctl_space_info *dest_orig;
+ struct btrfs_ioctl_space_info AUTO_KFREE(dest_orig);
struct btrfs_ioctl_space_info __user *user_dest;
struct btrfs_space_info *info;
static const u64 types[] = {
@@ -3077,9 +3027,8 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
(arg + sizeof(struct btrfs_ioctl_space_args));
if (copy_to_user(user_dest, dest_orig, alloc_size))
- ret = -EFAULT;
+ return -EFAULT;
- kfree(dest_orig);
out:
if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
ret = -EFAULT;
@@ -3298,7 +3247,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
u64 rel_ptr;
int size;
struct btrfs_ioctl_ino_path_args *ipa = NULL;
- struct inode_fs_paths *ipath = NULL;
+ struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL;
struct btrfs_path *path;
if (!capable(CAP_DAC_READ_SEARCH))
@@ -3346,7 +3295,6 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
out:
btrfs_free_path(path);
- free_ipath(ipath);
kfree(ipa);
return ret;
@@ -3611,7 +3559,7 @@ static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd)
static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_balance_args *bargs;
+ struct btrfs_ioctl_balance_args AUTO_KFREE(bargs);
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
@@ -3633,8 +3581,6 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
-
- kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
return ret;
@@ -4228,7 +4174,7 @@ static int check_feature_bits(const struct btrfs_fs_info *fs_info,
u64 safe_set, u64 safe_clear)
{
const char *type = btrfs_feature_set_name(set);
- char *names;
+ const char AUTO_KFREE(names);
u64 disallowed, unsupported;
u64 set_mask = flags & change_mask;
u64 clear_mask = ~flags & change_mask;
@@ -4236,12 +4182,11 @@ static int check_feature_bits(const struct btrfs_fs_info *fs_info,
unsupported = set_mask & ~supported_flags;
if (unsupported) {
names = btrfs_printable_features(set, unsupported);
- if (names) {
+ if (names)
btrfs_warn(fs_info,
"this kernel does not support the %s feature bit%s",
names, strchr(names, ',') ? "s" : "");
- kfree(names);
- } else
+ else
btrfs_warn(fs_info,
"this kernel does not support %s bits 0x%llx",
type, unsupported);
@@ -4251,12 +4196,11 @@ static int check_feature_bits(const struct btrfs_fs_info *fs_info,
disallowed = set_mask & ~safe_set;
if (disallowed) {
names = btrfs_printable_features(set, disallowed);
- if (names) {
+ if (names)
btrfs_warn(fs_info,
"can't set the %s feature bit%s while mounted",
names, strchr(names, ',') ? "s" : "");
- kfree(names);
- } else
+ else
btrfs_warn(fs_info,
"can't set %s bits 0x%llx while mounted",
type, disallowed);
@@ -4266,12 +4210,11 @@ static int check_feature_bits(const struct btrfs_fs_info *fs_info,
disallowed = clear_mask & ~safe_clear;
if (disallowed) {
names = btrfs_printable_features(set, disallowed);
- if (names) {
+ if (names)
btrfs_warn(fs_info,
"can't clear the %s feature bit%s while mounted",
names, strchr(names, ',') ? "s" : "");
- kfree(names);
- } else
+ else
btrfs_warn(fs_info,
"can't clear %s bits 0x%llx while mounted",
type, disallowed);
@@ -4418,10 +4361,6 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
goto out_acct;
}
- if (fs_info->sectorsize > PAGE_SIZE) {
- ret = -ENOTTY;
- goto out_acct;
- }
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
@@ -4513,7 +4452,6 @@ out_acct:
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
{
- struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
struct btrfs_ioctl_encoded_io_args args;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -4527,11 +4465,6 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_acct;
}
- if (fs_info->sectorsize > PAGE_SIZE) {
- ret = -ENOTTY;
- goto out_acct;
- }
-
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out_acct;
@@ -4649,8 +4582,9 @@ struct io_btrfs_cmd {
struct btrfs_uring_priv *priv;
};
-static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags)
+static void btrfs_uring_read_finished(struct io_tw_req tw_req, io_tw_token_t tw)
{
+ struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
struct btrfs_uring_priv *priv = bc->priv;
struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
@@ -4695,7 +4629,7 @@ out:
btrfs_unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- io_uring_cmd_done(cmd, ret, issue_flags);
+ io_uring_cmd_done(cmd, ret, IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
add_rchar(current, ret);
for (index = 0; index < priv->nr_pages; index++)
@@ -4813,11 +4747,6 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
ret = -EPERM;
goto out_acct;
}
- if (fs_info->sectorsize > PAGE_SIZE) {
- ret = -ENOTTY;
- goto out_acct;
- }
-
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (issue_flags & IO_URING_F_COMPAT) {
@@ -4945,7 +4874,6 @@ out_acct:
static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct file *file = cmd->file;
- struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
@@ -4960,11 +4888,6 @@ static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issu
ret = -EPERM;
goto out_acct;
}
- if (fs_info->sectorsize > PAGE_SIZE) {
- ret = -ENOTTY;
- goto out_acct;
- }
-
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (!(file->f_mode & FMODE_WRITE)) {
@@ -5077,6 +5000,9 @@ out_acct:
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file)))))
+ return -EIO;
+
switch (cmd->cmd_op) {
case BTRFS_IOC_ENCODED_READ:
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
@@ -5220,6 +5146,43 @@ static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *a
return 0;
}
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_ioctl_shutdown(struct btrfs_fs_info *fs_info, unsigned long arg)
+{
+ int ret = 0;
+ u32 flags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (u32 __user *)arg))
+ return -EFAULT;
+
+ if (flags >= BTRFS_SHUTDOWN_FLAGS_LAST)
+ return -EINVAL;
+
+ if (btrfs_is_shutdown(fs_info))
+ return 0;
+
+ switch (flags) {
+ case BTRFS_SHUTDOWN_FLAGS_LOGFLUSH:
+ case BTRFS_SHUTDOWN_FLAGS_DEFAULT:
+ ret = freeze_super(fs_info->sb, FREEZE_HOLDER_KERNEL, NULL);
+ if (ret)
+ return ret;
+ btrfs_force_shutdown(fs_info);
+ ret = thaw_super(fs_info->sb, FREEZE_HOLDER_KERNEL, NULL);
+ if (ret)
+ return ret;
+ break;
+ case BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH:
+ btrfs_force_shutdown(fs_info);
+ break;
+ }
+ return ret;
+}
+#endif
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -5375,6 +5338,10 @@ long btrfs_ioctl(struct file *file, unsigned int
#endif
case BTRFS_IOC_SUBVOL_SYNC_WAIT:
return btrfs_ioctl_subvol_sync(fs_info, argp);
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ case BTRFS_IOC_SHUTDOWN:
+ return btrfs_ioctl_shutdown(fs_info, arg);
+#endif
}
return -ENOTTY;
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index a0cf8effe008..2f853de44473 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -24,6 +24,7 @@ static const char fs_state_chars[] = {
[BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',
[BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S',
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
+ [BTRFS_FS_STATE_EMERGENCY_SHUTDOWN] = 'E',
};
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index 4416c165644f..d8c0bd17dcda 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -168,7 +168,8 @@ do { \
#endif
#else
-#define ASSERT(cond, args...) (void)(cond)
+/* Compile check the @cond expression but don't generate any code. */
+#define ASSERT(cond, args...) BUILD_BUG_ON_INVALID(cond)
#endif
#ifdef CONFIG_BTRFS_DEBUG
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 60f9b000d644..12c5a9d6564f 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -14,6 +14,13 @@
#include <linux/bio.h>
/*
+ * Convenience macros to define a pointer with the __free(kfree) and
+ * __free(kvfree) cleanup attributes and initialized to NULL.
+ */
+#define AUTO_KFREE(name) *name __free(kfree) = NULL
+#define AUTO_KVFREE(name) *name __free(kvfree) = NULL
+
+/*
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
*/
#define ENUM_BIT(name) \
@@ -209,9 +216,4 @@ static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
return (found_set == start + nbits);
}
-static inline u64 folio_end(struct folio *folio)
-{
- return folio_pos(folio) + folio_size(folio);
-}
-
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2829f20d7bb5..5df02c707aee 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -237,14 +237,14 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
/* One ref for the tree. */
refcount_inc(&entry->refs);
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
node = tree_insert(&inode->ordered_tree, entry->file_offset,
&entry->rb_node);
if (unlikely(node))
btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu",
entry->file_offset);
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
spin_lock(&root->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
@@ -328,9 +328,9 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
{
struct btrfs_inode *inode = entry->inode;
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
list_add_tail(&sum->list, &entry->list);
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
}
void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered)
@@ -359,7 +359,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
if (folio) {
ASSERT(folio->mapping);
ASSERT(folio_pos(folio) <= file_offset);
- ASSERT(file_offset + len <= folio_end(folio));
+ ASSERT(file_offset + len <= folio_next_pos(folio));
/*
* Ordered flag indicates whether we still have
@@ -417,15 +417,14 @@ void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
bool uptodate)
{
struct btrfs_inode *inode = ordered->inode;
- unsigned long flags;
bool ret;
trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
- spin_lock_irqsave(&inode->ordered_tree_lock, flags);
+ spin_lock(&inode->ordered_tree_lock);
ret = can_finish_ordered_extent(ordered, folio, file_offset, len,
uptodate);
- spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
+ spin_unlock(&inode->ordered_tree_lock);
/*
* If this is a COW write it means we created new extent maps for the
@@ -481,18 +480,16 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
{
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- unsigned long flags;
u64 cur = file_offset;
+ const u64 end = file_offset + num_bytes;
- trace_btrfs_writepage_end_io_hook(inode, file_offset,
- file_offset + num_bytes - 1,
- uptodate);
+ trace_btrfs_writepage_end_io_hook(inode, file_offset, end - 1, uptodate);
- spin_lock_irqsave(&inode->ordered_tree_lock, flags);
- while (cur < file_offset + num_bytes) {
+ spin_lock(&inode->ordered_tree_lock);
+ while (cur < end) {
u64 entry_end;
- u64 end;
- u32 len;
+ u64 this_end;
+ u64 len;
node = ordered_tree_search(inode, cur);
/* No ordered extents at all */
@@ -535,19 +532,18 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
* |
* cur
*/
- end = min(entry->file_offset + entry->num_bytes,
- file_offset + num_bytes) - 1;
- ASSERT(end + 1 - cur < U32_MAX);
- len = end + 1 - cur;
+ this_end = min(entry_end, end);
+ len = this_end - cur;
+ ASSERT(len < U32_MAX);
if (can_finish_ordered_extent(entry, folio, cur, len, uptodate)) {
- spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
+ spin_unlock(&inode->ordered_tree_lock);
btrfs_queue_ordered_fn(entry);
- spin_lock_irqsave(&inode->ordered_tree_lock, flags);
+ spin_lock(&inode->ordered_tree_lock);
}
cur += len;
}
- spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
+ spin_unlock(&inode->ordered_tree_lock);
}
/*
@@ -573,10 +569,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
{
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- unsigned long flags;
bool finished = false;
- spin_lock_irqsave(&inode->ordered_tree_lock, flags);
+ spin_lock(&inode->ordered_tree_lock);
if (cached && *cached) {
entry = *cached;
goto have_entry;
@@ -613,7 +608,7 @@ out:
refcount_inc(&entry->refs);
trace_btrfs_ordered_extent_dec_test_pending(inode, entry);
}
- spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
+ spin_unlock(&inode->ordered_tree_lock);
return finished;
}
@@ -678,7 +673,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
fs_info->delalloc_batch);
- spin_lock_irq(&btrfs_inode->ordered_tree_lock);
+ spin_lock(&btrfs_inode->ordered_tree_lock);
node = &entry->rb_node;
rb_erase(node, &btrfs_inode->ordered_tree);
RB_CLEAR_NODE(node);
@@ -686,7 +681,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
btrfs_inode->ordered_tree_last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags);
- spin_unlock_irq(&btrfs_inode->ordered_tree_lock);
+ spin_unlock(&btrfs_inode->ordered_tree_lock);
/*
* The current running transaction is waiting on us, we need to let it
@@ -971,9 +966,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *ino
{
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- unsigned long flags;
- spin_lock_irqsave(&inode->ordered_tree_lock, flags);
+ spin_lock(&inode->ordered_tree_lock);
node = ordered_tree_search(inode, file_offset);
if (!node)
goto out;
@@ -986,7 +980,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *ino
trace_btrfs_ordered_extent_lookup(inode, entry);
}
out:
- spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
+ spin_unlock(&inode->ordered_tree_lock);
return entry;
}
@@ -999,7 +993,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
node = ordered_tree_search(inode, file_offset);
if (!node) {
node = ordered_tree_search(inode, file_offset + len);
@@ -1026,7 +1020,7 @@ out:
refcount_inc(&entry->refs);
trace_btrfs_ordered_extent_lookup_range(inode, entry);
}
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
return entry;
}
@@ -1041,7 +1035,7 @@ void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
btrfs_assert_inode_locked(inode);
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
for (n = rb_first(&inode->ordered_tree); n; n = rb_next(n)) {
struct btrfs_ordered_extent *ordered;
@@ -1055,7 +1049,7 @@ void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
refcount_inc(&ordered->refs);
trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered);
}
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
}
/*
@@ -1068,7 +1062,7 @@ btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
node = ordered_tree_search(inode, file_offset);
if (!node)
goto out;
@@ -1077,7 +1071,7 @@ btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
refcount_inc(&entry->refs);
trace_btrfs_ordered_extent_lookup_first(inode, entry);
out:
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
return entry;
}
@@ -1099,7 +1093,7 @@ struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
struct rb_node *next;
struct btrfs_ordered_extent *entry = NULL;
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
node = inode->ordered_tree.rb_node;
/*
* Here we don't want to use tree_search() which will use tree->last
@@ -1154,7 +1148,7 @@ out:
trace_btrfs_ordered_extent_lookup_first_range(inode, entry);
}
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
return entry;
}
@@ -1286,9 +1280,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
/*
* Take the root's ordered_extent_lock to avoid a race with
* btrfs_wait_ordered_extents() when updating the disk_bytenr and
- * disk_num_bytes fields of the ordered extent below. And we disable
- * IRQs because the inode's ordered_tree_lock is used in IRQ context
- * elsewhere.
+ * disk_num_bytes fields of the ordered extent below.
*
* There's no concern about a previous caller of
* btrfs_wait_ordered_extents() getting the trimmed ordered extent
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 62b993fae54f..f189bf09ce6a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -131,7 +131,7 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
btrfs_tree_block_key(eb, info, &key);
- pr_info("\t\ttree block key (%llu %u %llu) level %d\n",
+ pr_info("\t\ttree block key " BTRFS_KEY_FMT " level %d\n",
btrfs_disk_key_objectid(&key), key.type,
btrfs_disk_key_offset(&key),
btrfs_tree_block_level(eb, info));
@@ -277,9 +277,8 @@ static void print_dir_item(const struct extent_buffer *eb, int i)
struct btrfs_key location;
btrfs_dir_item_key_to_cpu(eb, di, &location);
- pr_info("\t\tlocation key (%llu %u %llu) type %d\n",
- location.objectid, location.type, location.offset,
- btrfs_dir_ftype(eb, di));
+ pr_info("\t\tlocation key " BTRFS_KEY_FMT " type %d\n",
+ BTRFS_KEY_FMT_VALUE(&location), btrfs_dir_ftype(eb, di));
pr_info("\t\ttransid %llu data_len %u name_len %u\n",
btrfs_dir_transid(eb, di), data_len, name_len);
di = (struct btrfs_dir_item *)((char *)di + len);
@@ -421,7 +420,7 @@ static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size
if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID)
scnprintf(buf, buf_size, "UNTYPED");
else if (key_to_str[key->type])
- scnprintf(buf, buf_size, key_to_str[key->type]);
+ scnprintf(buf, buf_size, "%s", key_to_str[key->type]);
else
scnprintf(buf, buf_size, "UNKNOWN.%d", key->type);
}
@@ -598,10 +597,9 @@ void btrfs_print_tree(const struct extent_buffer *c, bool follow)
print_eb_refs_lock(c);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
- pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
- i, key.objectid, key.type, key.offset,
- btrfs_node_blockptr(c, i),
- btrfs_node_ptr_generation(c, i));
+ pr_info("\tkey %d " BTRFS_KEY_FMT " block %llu gen %llu\n",
+ i, BTRFS_KEY_FMT_VALUE(&key), btrfs_node_blockptr(c, i),
+ btrfs_node_ptr_generation(c, i));
}
if (!follow)
return;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 31ad8580322a..9e2b53e90dcb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -660,7 +660,7 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
{
int ret;
struct btrfs_root *quota_root = trans->fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
path = btrfs_alloc_path();
@@ -672,7 +672,6 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
key.offset = dst;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
- btrfs_free_path(path);
return ret;
}
@@ -681,7 +680,7 @@ static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
{
int ret;
struct btrfs_root *quota_root = trans->fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
path = btrfs_alloc_path();
@@ -694,24 +693,19 @@ static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
- goto out;
+ return ret;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret > 0)
+ return -ENOENT;
- ret = btrfs_del_item(trans, quota_root, path);
-out:
- btrfs_free_path(path);
- return ret;
+ return btrfs_del_item(trans, quota_root, path);
}
static int add_qgroup_item(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root, u64 qgroupid)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_qgroup_info_item *qgroup_info;
struct btrfs_qgroup_limit_item *qgroup_limit;
struct extent_buffer *leaf;
@@ -737,7 +731,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_info));
if (ret && ret != -EEXIST)
- goto out;
+ return ret;
leaf = path->nodes[0];
qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
@@ -754,7 +748,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_limit));
if (ret && ret != -EEXIST)
- goto out;
+ return ret;
leaf = path->nodes[0];
qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
@@ -765,17 +759,14 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
{
int ret;
struct btrfs_root *quota_root = trans->fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
path = btrfs_alloc_path();
@@ -787,33 +778,27 @@ static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
key.offset = qgroupid;
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
- goto out;
+ return ret;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret > 0)
+ return -ENOENT;
ret = btrfs_del_item(trans, quota_root, path);
if (ret)
- goto out;
+ return ret;
btrfs_release_path(path);
key.type = BTRFS_QGROUP_LIMIT_KEY;
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
- goto out;
+ return ret;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret > 0)
+ return -ENOENT;
ret = btrfs_del_item(trans, quota_root, path);
-out:
- btrfs_free_path(path);
return ret;
}
@@ -821,7 +806,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *qgroup)
{
struct btrfs_root *quota_root = trans->fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_limit_item *qgroup_limit;
@@ -841,7 +826,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
ret = -ENOENT;
if (ret)
- goto out;
+ return ret;
l = path->nodes[0];
slot = path->slots[0];
@@ -851,8 +836,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -861,7 +845,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_info_item *qgroup_info;
@@ -884,7 +868,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
ret = -ENOENT;
if (ret)
- goto out;
+ return ret;
l = path->nodes[0];
slot = path->slots[0];
@@ -894,8 +878,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -903,7 +886,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_status_item *ptr;
@@ -923,7 +906,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
ret = -ENOENT;
if (ret)
- goto out;
+ return ret;
l = path->nodes[0];
slot = path->slots[0];
@@ -933,8 +916,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
btrfs_set_qgroup_status_rescan(l, ptr,
fs_info->qgroup_rescan_progress.objectid);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -944,7 +926,7 @@ out:
static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *leaf = NULL;
int ret;
@@ -961,7 +943,7 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
- goto out;
+ return ret;
leaf = path->nodes[0];
nr = btrfs_header_nritems(leaf);
if (!nr)
@@ -974,14 +956,12 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
path->slots[0] = 0;
ret = btrfs_del_items(trans, root, path, 0, nr);
if (ret)
- goto out;
+ return ret;
btrfs_release_path(path);
}
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+
+ return 0;
}
int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
@@ -1263,7 +1243,14 @@ out:
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_end_transaction(trans);
- kfree(prealloc);
+
+ /*
+ * At this point we either failed at allocating prealloc, or we
+ * succeeded and passed the ownership to it to add_qgroup_rb(). In any
+ * case, this needs to be NULL or there is something wrong.
+ */
+ ASSERT(prealloc == NULL);
+
return ret;
}
@@ -1695,7 +1682,12 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
- kfree(prealloc);
+ /*
+ * At this point we either failed at allocating prealloc, or we
+ * succeeded and passed the ownership to it to add_qgroup_rb(). In any
+ * case, this needs to be NULL or there is something wrong.
+ */
+ ASSERT(prealloc == NULL);
return ret;
}
@@ -1707,8 +1699,7 @@ out:
static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup)
{
struct btrfs_key key;
- struct btrfs_path *path;
- int ret;
+ BTRFS_PATH_AUTO_FREE(path);
/*
* Squota would never be inconsistent, but there can still be case
@@ -1741,13 +1732,11 @@ static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup
if (!path)
return -ENOMEM;
- ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL);
- btrfs_free_path(path);
/*
* The @ret from btrfs_find_root() exactly matches our definition for
* the return value, thus can be returned directly.
*/
- return ret;
+ return btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL);
}
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
@@ -2296,7 +2285,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
bool trace_leaf)
{
struct btrfs_key key;
- struct btrfs_path *src_path;
+ BTRFS_PATH_AUTO_FREE(src_path);
struct btrfs_fs_info *fs_info = trans->fs_info;
u32 nodesize = fs_info->nodesize;
int cur_level = root_level;
@@ -2308,10 +2297,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
return -EINVAL;
src_path = btrfs_alloc_path();
- if (!src_path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!src_path)
+ return -ENOMEM;
if (dst_level)
btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
@@ -2337,10 +2324,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
parent_slot = src_path->slots[cur_level + 1];
eb = btrfs_read_node_slot(eb, parent_slot);
- if (IS_ERR(eb)) {
- ret = PTR_ERR(eb);
- goto out;
- }
+ if (IS_ERR(eb))
+ return PTR_ERR(eb);
src_path->nodes[cur_level] = eb;
@@ -2361,10 +2346,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
&src_key, src_path->slots[cur_level]);
}
/* Content mismatch, something went wrong */
- if (btrfs_comp_cpu_keys(&dst_key, &src_key)) {
- ret = -ENOENT;
- goto out;
- }
+ if (btrfs_comp_cpu_keys(&dst_key, &src_key))
+ return -ENOENT;
cur_level--;
}
@@ -2375,21 +2358,20 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start,
nodesize);
if (ret < 0)
- goto out;
+ return ret;
ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start,
nodesize);
if (ret < 0)
- goto out;
+ return ret;
/* Record leaf file extents */
if (dst_level == 0 && trace_leaf) {
ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]);
if (ret < 0)
- goto out;
+ return ret;
ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]);
}
-out:
- btrfs_free_path(src_path);
+
return ret;
}
@@ -2590,7 +2572,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
int level;
u8 drop_subptree_thres;
struct extent_buffer *eb = root_eb;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL);
ASSERT(root_eb != NULL);
@@ -2623,12 +2605,12 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
ret = btrfs_read_extent_buffer(root_eb, &check);
if (ret)
- goto out;
+ return ret;
}
if (root_level == 0) {
ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
- goto out;
+ return ret;
}
path = btrfs_alloc_path();
@@ -2664,10 +2646,8 @@ walk_down:
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
eb = btrfs_read_node_slot(eb, parent_slot);
- if (IS_ERR(eb)) {
- ret = PTR_ERR(eb);
- goto out;
- }
+ if (IS_ERR(eb))
+ return PTR_ERR(eb);
path->nodes[level] = eb;
path->slots[level] = 0;
@@ -2678,14 +2658,14 @@ walk_down:
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
fs_info->nodesize);
if (ret)
- goto out;
+ return ret;
}
if (level == 0) {
ret = btrfs_qgroup_trace_leaf_items(trans,
path->nodes[level]);
if (ret)
- goto out;
+ return ret;
/* Nonzero return here means we completed our search */
ret = adjust_slots_upwards(path, root_level);
@@ -2699,11 +2679,7 @@ walk_down:
level--;
}
- ret = 0;
-out:
- btrfs_free_path(path);
-
- return ret;
+ return 0;
}
static void qgroup_iterator_nested_add(struct list_head *head, struct btrfs_qgroup *qgroup)
@@ -3303,7 +3279,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
- struct btrfs_qgroup *prealloc;
+ struct btrfs_qgroup *prealloc = NULL;
struct btrfs_qgroup_list **qlist_prealloc = NULL;
bool free_inherit = false;
bool need_rescan = false;
@@ -3544,7 +3520,14 @@ out:
}
if (free_inherit)
kfree(inherit);
- kfree(prealloc);
+
+ /*
+ * At this point we either failed at allocating prealloc, or we
+ * succeeded and passed the ownership to it to add_qgroup_rb(). In any
+ * case, this needs to be NULL or there is something wrong.
+ */
+ ASSERT(prealloc == NULL);
+
return ret;
}
@@ -3712,10 +3695,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
path, 1, 0);
btrfs_debug(fs_info,
- "current progress key (%llu %u %llu), search_slot ret %d",
- fs_info->qgroup_rescan_progress.objectid,
- fs_info->qgroup_rescan_progress.type,
- fs_info->qgroup_rescan_progress.offset, ret);
+ "current progress key " BTRFS_KEY_FMT ", search_slot ret %d",
+ BTRFS_KEY_FMT_VALUE(&fs_info->qgroup_rescan_progress), ret);
if (ret) {
/*
@@ -3817,8 +3798,8 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
* Rescan should only search for commit root, and any later difference
* should be recorded by qgroup
*/
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
while (!ret && !(stopped = rescan_should_stop(fs_info))) {
trans = btrfs_start_transaction(fs_info->fs_root, 0);
@@ -4796,7 +4777,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_tree_parent_check check = { 0 };
struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
- struct btrfs_qgroup_swapped_block *block;
+ struct btrfs_qgroup_swapped_block AUTO_KFREE(block);
struct extent_buffer *reloc_eb = NULL;
struct rb_node *node;
bool swapped = false;
@@ -4853,7 +4834,6 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
block->last_snapshot, block->trace_leaf);
free_out:
- kfree(block);
free_extent_buffer(reloc_eb);
out:
if (ret < 0) {
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index cc6f6095cc9f..2987cb7c686e 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -19,7 +19,7 @@ static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
u64 newlen, u64 frontpad)
{
struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
- struct btrfs_stripe_extent *extent, *newitem;
+ struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem);
struct extent_buffer *leaf;
int slot;
size_t item_size;
@@ -53,14 +53,10 @@ static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, stripe_root, path);
if (ret)
- goto out;
+ return ret;
btrfs_release_path(path);
- ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
-
-out:
- kfree(newitem);
- return ret;
+ return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
}
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
@@ -299,7 +295,7 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
struct btrfs_key stripe_key;
struct btrfs_root *stripe_root = fs_info->stripe_root;
const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
- struct btrfs_stripe_extent *stripe_extent;
+ struct btrfs_stripe_extent AUTO_KFREE(stripe_extent);
const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
int ret;
@@ -336,8 +332,6 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
}
- kfree(stripe_extent);
-
return ret;
}
@@ -394,8 +388,8 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
return -ENOMEM;
if (stripe->rst_search_commit_root) {
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
}
ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0135dceb7baa..f38d8305e46d 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -66,10 +66,10 @@ static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info,
dump_bioc(fs_info, rbio->bioc);
btrfs_crit(fs_info,
-"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx",
+"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u sector_nsteps=%u scrubp=%u dbitmap=0x%lx",
rbio->flags, rbio->nr_sectors, rbio->nr_data,
rbio->real_stripes, rbio->stripe_nsectors,
- rbio->scrubp, rbio->dbitmap);
+ rbio->sector_nsteps, rbio->scrubp, rbio->dbitmap);
}
#define ASSERT_RBIO(expr, rbio) \
@@ -134,18 +134,10 @@ struct btrfs_stripe_hash_table {
};
/*
- * A structure to present a sector inside a page, the length is fixed to
- * sectorsize;
+ * The PFN may still be valid, but our paddrs should always be block size
+ * aligned, thus such -1 paddr is definitely not a valid one.
*/
-struct sector_ptr {
- /*
- * Blocks from the bio list can still be highmem.
- * So here we use physical address to present a page and the offset inside it.
- */
- phys_addr_t paddr;
- bool has_paddr;
- bool uptodate;
-};
+#define INVALID_PADDR (~(phys_addr_t)0)
static void rmw_rbio_work(struct work_struct *work);
static void rmw_rbio_work_locked(struct work_struct *work);
@@ -159,8 +151,8 @@ static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
{
bitmap_free(rbio->error_bitmap);
kfree(rbio->stripe_pages);
- kfree(rbio->bio_sectors);
- kfree(rbio->stripe_sectors);
+ kfree(rbio->bio_paddrs);
+ kfree(rbio->stripe_paddrs);
kfree(rbio->finish_pointers);
}
@@ -235,12 +227,22 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
return 0;
}
-static void memcpy_sectors(const struct sector_ptr *dst,
- const struct sector_ptr *src, u32 blocksize)
+static void memcpy_from_bio_to_stripe(struct btrfs_raid_bio *rbio, unsigned int sector_nr)
{
- memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr),
- phys_to_page(src->paddr), offset_in_page(src->paddr),
- blocksize);
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
+
+ ASSERT(sector_nr < rbio->nr_sectors);
+ for (int i = 0; i < rbio->sector_nsteps; i++) {
+ unsigned int index = sector_nr * rbio->sector_nsteps + i;
+ phys_addr_t dst = rbio->stripe_paddrs[index];
+ phys_addr_t src = rbio->bio_paddrs[index];
+
+ ASSERT(dst != INVALID_PADDR);
+ ASSERT(src != INVALID_PADDR);
+
+ memcpy_page(phys_to_page(dst), offset_in_page(dst),
+ phys_to_page(src), offset_in_page(src), step);
+ }
}
/*
@@ -263,20 +265,19 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_sectors; i++) {
/* Some range not covered by bio (partial write), skip it */
- if (!rbio->bio_sectors[i].has_paddr) {
+ if (rbio->bio_paddrs[i * rbio->sector_nsteps] == INVALID_PADDR) {
/*
* Even if the sector is not covered by bio, if it is
* a data sector it should still be uptodate as it is
* read from disk.
*/
if (i < rbio->nr_data * rbio->stripe_nsectors)
- ASSERT(rbio->stripe_sectors[i].uptodate);
+ ASSERT(test_bit(i, rbio->stripe_uptodate_bitmap));
continue;
}
- memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i],
- rbio->bioc->fs_info->sectorsize);
- rbio->stripe_sectors[i].uptodate = 1;
+ memcpy_from_bio_to_stripe(rbio, i);
+ set_bit(i, rbio->stripe_uptodate_bitmap);
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
}
@@ -299,19 +300,48 @@ static int rbio_bucket(struct btrfs_raid_bio *rbio)
return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
}
-static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
- unsigned int page_nr)
+/* Get the sector number of the first sector covered by @page_nr. */
+static u32 page_nr_to_sector_nr(struct btrfs_raid_bio *rbio, unsigned int page_nr)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+ u32 sector_nr;
+
+ ASSERT(page_nr < rbio->nr_pages);
+
+ sector_nr = (page_nr << PAGE_SHIFT) >> rbio->bioc->fs_info->sectorsize_bits;
+ ASSERT(sector_nr < rbio->nr_sectors);
+ return sector_nr;
+}
+
+/*
+ * Get the number of sectors covered by @page_nr.
+ *
+ * For bs > ps cases, the result will always be 1.
+ * For bs <= ps cases, the result will be ps / bs.
+ */
+static u32 page_nr_to_num_sectors(struct btrfs_raid_bio *rbio, unsigned int page_nr)
+{
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ u32 nr_sectors;
+
+ ASSERT(page_nr < rbio->nr_pages);
+
+ nr_sectors = round_up(PAGE_SIZE, fs_info->sectorsize) >> fs_info->sectorsize_bits;
+ ASSERT(nr_sectors > 0);
+ return nr_sectors;
+}
+
+static __maybe_unused bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
+ unsigned int page_nr)
+{
+ const u32 sector_nr = page_nr_to_sector_nr(rbio, page_nr);
+ const u32 nr_bits = page_nr_to_num_sectors(rbio, page_nr);
int i;
ASSERT(page_nr < rbio->nr_pages);
+ ASSERT(sector_nr + nr_bits < rbio->nr_sectors);
- for (i = sectors_per_page * page_nr;
- i < sectors_per_page * page_nr + sectors_per_page;
- i++) {
- if (!rbio->stripe_sectors[i].uptodate)
+ for (i = sector_nr; i < sector_nr + nr_bits; i++) {
+ if (!test_bit(i, rbio->stripe_uptodate_bitmap))
return false;
}
return true;
@@ -324,46 +354,44 @@ static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
*/
static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
u32 offset;
int i;
- for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
+ for (i = 0, offset = 0; i < rbio->nr_sectors * rbio->sector_nsteps;
+ i++, offset += step) {
int page_index = offset >> PAGE_SHIFT;
ASSERT(page_index < rbio->nr_pages);
if (!rbio->stripe_pages[page_index])
continue;
- rbio->stripe_sectors[i].has_paddr = true;
- rbio->stripe_sectors[i].paddr =
- page_to_phys(rbio->stripe_pages[page_index]) +
- offset_in_page(offset);
+ rbio->stripe_paddrs[i] = page_to_phys(rbio->stripe_pages[page_index]) +
+ offset_in_page(offset);
}
}
static void steal_rbio_page(struct btrfs_raid_bio *src,
struct btrfs_raid_bio *dest, int page_nr)
{
- const u32 sectorsize = src->bioc->fs_info->sectorsize;
- const u32 sectors_per_page = PAGE_SIZE / sectorsize;
- int i;
+ const u32 sector_nr = page_nr_to_sector_nr(src, page_nr);
+ const u32 nr_bits = page_nr_to_num_sectors(src, page_nr);
+
+ ASSERT(page_nr < src->nr_pages);
+ ASSERT(sector_nr + nr_bits < src->nr_sectors);
if (dest->stripe_pages[page_nr])
__free_page(dest->stripe_pages[page_nr]);
dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
src->stripe_pages[page_nr] = NULL;
- /* Also update the sector->uptodate bits. */
- for (i = sectors_per_page * page_nr;
- i < sectors_per_page * page_nr + sectors_per_page; i++)
- dest->stripe_sectors[i].uptodate = true;
+ /* Also update the stripe_uptodate_bitmap bits. */
+ bitmap_set(dest->stripe_uptodate_bitmap, sector_nr, nr_bits);
}
static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr)
{
- const int sector_nr = (page_nr << PAGE_SHIFT) >>
- rbio->bioc->fs_info->sectorsize_bits;
+ const int sector_nr = page_nr_to_sector_nr(rbio, page_nr);
/*
* We have ensured PAGE_SIZE is aligned with sectorsize, thus
@@ -677,39 +705,62 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
return 1;
}
-static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
- unsigned int stripe_nr,
- unsigned int sector_nr)
+/* Return the sector index for @stripe_nr and @sector_nr. */
+static unsigned int rbio_sector_index(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr,
+ unsigned int sector_nr)
{
+ unsigned int ret;
+
ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr);
- return stripe_nr * rbio->stripe_nsectors + sector_nr;
+ ret = stripe_nr * rbio->stripe_nsectors + sector_nr;
+ ASSERT(ret < rbio->nr_sectors);
+ return ret;
+}
+
+/* Return the paddr array index for @stripe_nr, @sector_nr and @step_nr. */
+static unsigned int rbio_paddr_index(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr,
+ unsigned int sector_nr,
+ unsigned int step_nr)
+{
+ unsigned int ret;
+
+ ASSERT_RBIO_SECTOR(step_nr < rbio->sector_nsteps, rbio, step_nr);
+
+ ret = rbio_sector_index(rbio, stripe_nr, sector_nr) * rbio->sector_nsteps + step_nr;
+ ASSERT(ret < rbio->nr_sectors * rbio->sector_nsteps);
+ return ret;
}
-/* Return a sector from rbio->stripe_sectors, not from the bio list */
-static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int stripe_nr,
- unsigned int sector_nr)
+static phys_addr_t rbio_stripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr, unsigned int sector_nr,
+ unsigned int step_nr)
{
- return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr,
- sector_nr)];
+ return rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr)];
}
-/* Grab a sector inside P stripe */
-static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int sector_nr)
+static phys_addr_t rbio_pstripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr, unsigned int step_nr)
{
- return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr);
+ return rbio_stripe_paddr(rbio, rbio->nr_data, sector_nr, step_nr);
}
-/* Grab a sector inside Q stripe, return NULL if not RAID6 */
-static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int sector_nr)
+static phys_addr_t rbio_qstripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr, unsigned int step_nr)
{
if (rbio->nr_data + 1 == rbio->real_stripes)
- return NULL;
- return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr);
+ return INVALID_PADDR;
+ return rbio_stripe_paddr(rbio, rbio->nr_data + 1, sector_nr, step_nr);
+}
+
+/* Return a paddr pointer into the rbio::stripe_paddrs[] for the specified sector. */
+static phys_addr_t *rbio_stripe_paddrs(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr, unsigned int sector_nr)
+{
+ return &rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, 0)];
}
/*
@@ -944,7 +995,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t status)
}
/*
- * Get a sector pointer specified by its @stripe_nr and @sector_nr.
+ * Get paddr pointer for the sector specified by its @stripe_nr and @sector_nr.
*
* @rbio: The raid bio
* @stripe_nr: Stripe number, valid range [0, real_stripe)
@@ -954,34 +1005,52 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t status)
*
* The read/modify/write code wants to reuse the original bio page as much
* as possible, and only use stripe_sectors as fallback.
+ *
+ * Return NULL if bio_list_only is set but the specified sector has no
+ * coresponding bio.
*/
-static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
- int stripe_nr, int sector_nr,
- bool bio_list_only)
+static phys_addr_t *sector_paddrs_in_rbio(struct btrfs_raid_bio *rbio,
+ int stripe_nr, int sector_nr,
+ bool bio_list_only)
{
- struct sector_ptr *sector;
- int index;
+ phys_addr_t *ret = NULL;
+ const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, 0);
- ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes,
- rbio, stripe_nr);
- ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
- rbio, sector_nr);
+ ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps);
- index = stripe_nr * rbio->stripe_nsectors + sector_nr;
- ASSERT(index >= 0 && index < rbio->nr_sectors);
-
- spin_lock(&rbio->bio_list_lock);
- sector = &rbio->bio_sectors[index];
- if (sector->has_paddr || bio_list_only) {
- /* Don't return sector without a valid page pointer */
- if (!sector->has_paddr)
- sector = NULL;
- spin_unlock(&rbio->bio_list_lock);
- return sector;
+ scoped_guard(spinlock, &rbio->bio_list_lock) {
+ if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) {
+ /* Don't return sector without a valid page pointer */
+ if (rbio->bio_paddrs[index] != INVALID_PADDR)
+ ret = &rbio->bio_paddrs[index];
+ return ret;
+ }
}
- spin_unlock(&rbio->bio_list_lock);
+ return &rbio->stripe_paddrs[index];
+}
- return &rbio->stripe_sectors[index];
+/*
+ * Similar to sector_paddr_in_rbio(), but with extra consideration for
+ * bs > ps cases, where we can have multiple steps for a fs block.
+ */
+static phys_addr_t sector_paddr_in_rbio(struct btrfs_raid_bio *rbio,
+ int stripe_nr, int sector_nr, int step_nr,
+ bool bio_list_only)
+{
+ phys_addr_t ret = INVALID_PADDR;
+ const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr);
+
+ ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps);
+
+ scoped_guard(spinlock, &rbio->bio_list_lock) {
+ if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) {
+ /* Don't return sector without a valid page pointer */
+ if (rbio->bio_paddrs[index] != INVALID_PADDR)
+ ret = rbio->bio_paddrs[index];
+ return ret;
+ }
+ }
+ return rbio->stripe_paddrs[index];
}
/*
@@ -997,10 +1066,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
const unsigned int stripe_nsectors =
BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
const unsigned int num_sectors = stripe_nsectors * real_stripes;
+ const unsigned int step = min(fs_info->sectorsize, PAGE_SIZE);
+ const unsigned int sector_nsteps = fs_info->sectorsize / step;
struct btrfs_raid_bio *rbio;
- /* PAGE_SIZE must also be aligned to sectorsize for subpage support */
- ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
+ /*
+ * For bs <= ps cases, ps must be aligned to bs.
+ * For bs > ps cases, bs must be aligned to ps.
+ */
+ ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize) ||
+ IS_ALIGNED(fs_info->sectorsize, PAGE_SIZE));
/*
* Our current stripe len should be fixed to 64k thus stripe_nsectors
* (at most 16) should be no larger than BITS_PER_LONG.
@@ -1019,19 +1094,22 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
return ERR_PTR(-ENOMEM);
rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *),
GFP_NOFS);
- rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
- GFP_NOFS);
- rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
- GFP_NOFS);
+ rbio->bio_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
+ rbio->stripe_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
+ rbio->stripe_uptodate_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
- if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors ||
- !rbio->finish_pointers || !rbio->error_bitmap) {
+ if (!rbio->stripe_pages || !rbio->bio_paddrs || !rbio->stripe_paddrs ||
+ !rbio->finish_pointers || !rbio->error_bitmap || !rbio->stripe_uptodate_bitmap) {
free_raid_bio_pointers(rbio);
kfree(rbio);
return ERR_PTR(-ENOMEM);
}
+ for (int i = 0; i < num_sectors * sector_nsteps; i++) {
+ rbio->stripe_paddrs[i] = INVALID_PADDR;
+ rbio->bio_paddrs[i] = INVALID_PADDR;
+ }
bio_list_init(&rbio->bio_list);
init_waitqueue_head(&rbio->io_wait);
@@ -1046,6 +1124,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
rbio->real_stripes = real_stripes;
rbio->stripe_npages = stripe_npages;
rbio->stripe_nsectors = stripe_nsectors;
+ rbio->sector_nsteps = sector_nsteps;
refcount_set(&rbio->refs, 1);
atomic_set(&rbio->stripes_pending, 0);
@@ -1090,8 +1169,8 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
* @faila and @failb will also be updated to the first and second stripe
* number of the errors.
*/
-static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
- int *faila, int *failb)
+static int get_rbio_vertical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
+ int *faila, int *failb)
{
int stripe_nr;
int found_errors = 0;
@@ -1123,20 +1202,41 @@ static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
return found_errors;
}
+static int bio_add_paddrs(struct bio *bio, phys_addr_t *paddrs, unsigned int nr_steps,
+ unsigned int step)
+{
+ int added = 0;
+ int ret;
+
+ for (int i = 0; i < nr_steps; i++) {
+ ret = bio_add_page(bio, phys_to_page(paddrs[i]), step,
+ offset_in_page(paddrs[i]));
+ if (ret != step)
+ goto revert;
+ added += ret;
+ }
+ return added;
+revert:
+ /*
+ * We don't need to revert the bvec, as the bio will be submitted immediately,
+ * as long as the size is reduced the extra bvec will not be accessed.
+ */
+ bio->bi_iter.bi_size -= added;
+ return 0;
+}
+
/*
* Add a single sector @sector into our list of bios for IO.
*
* Return 0 if everything went well.
- * Return <0 for error.
+ * Return <0 for error, and no byte will be added to @rbio.
*/
-static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list,
- struct sector_ptr *sector,
- unsigned int stripe_nr,
- unsigned int sector_nr,
- enum req_op op)
+static int rbio_add_io_paddrs(struct btrfs_raid_bio *rbio, struct bio_list *bio_list,
+ phys_addr_t *paddrs, unsigned int stripe_nr,
+ unsigned int sector_nr, enum req_op op)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 step = min(sectorsize, PAGE_SIZE);
struct bio *last = bio_list->tail;
int ret;
struct bio *bio;
@@ -1152,7 +1252,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
rbio, sector_nr);
- ASSERT(sector->has_paddr);
+ ASSERT(paddrs != NULL);
stripe = &rbio->bioc->stripes[stripe_nr];
disk_start = stripe->physical + sector_nr * sectorsize;
@@ -1165,8 +1265,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
rbio->error_bitmap);
/* Check if we have reached tolerance early. */
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
- NULL, NULL);
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
+ NULL, NULL);
if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
return 0;
@@ -1183,8 +1283,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
- ret = bio_add_page(last, phys_to_page(sector->paddr),
- sectorsize, offset_in_page(sector->paddr));
+ ret = bio_add_paddrs(last, paddrs, rbio->sector_nsteps, step);
if (ret == sectorsize)
return 0;
}
@@ -1197,28 +1296,27 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
- __bio_add_page(bio, phys_to_page(sector->paddr), sectorsize,
- offset_in_page(sector->paddr));
+ ret = bio_add_paddrs(bio, paddrs, rbio->sector_nsteps, step);
+ ASSERT(ret == sectorsize);
bio_list_add(bio_list, bio);
return 0;
}
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 step_bits = min(fs_info->sectorsize_bits, PAGE_SHIFT);
struct bvec_iter iter = bio->bi_iter;
phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
- btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) {
- unsigned int index = (offset >> sectorsize_bits);
- struct sector_ptr *sector = &rbio->bio_sectors[index];
+ btrfs_bio_for_each_block(paddr, bio, &iter, step) {
+ unsigned int index = (offset >> step_bits);
- sector->has_paddr = true;
- sector->paddr = paddr;
- offset += sectorsize;
+ rbio->bio_paddrs[index] = paddr;
+ offset += step;
}
}
@@ -1296,56 +1394,64 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
}
-static inline void *kmap_local_sector(const struct sector_ptr *sector)
+static inline void *kmap_local_paddr(phys_addr_t paddr)
{
/* The sector pointer must have a page mapped to it. */
- ASSERT(sector->has_paddr);
+ ASSERT(paddr != INVALID_PADDR);
- return kmap_local_page(phys_to_page(sector->paddr)) +
- offset_in_page(sector->paddr);
+ return kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr);
}
-/* Generate PQ for one vertical stripe. */
-static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
+static void generate_pq_vertical_step(struct btrfs_raid_bio *rbio, unsigned int sector_nr,
+ unsigned int step_nr)
{
void **pointers = rbio->finish_pointers;
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct sector_ptr *sector;
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
int stripe;
const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6;
/* First collect one sector from each data stripe */
- for (stripe = 0; stripe < rbio->nr_data; stripe++) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_sector(sector);
- }
+ for (stripe = 0; stripe < rbio->nr_data; stripe++)
+ pointers[stripe] = kmap_local_paddr(
+ sector_paddr_in_rbio(rbio, stripe, sector_nr, step_nr, 0));
/* Then add the parity stripe */
- sector = rbio_pstripe_sector(rbio, sectornr);
- sector->uptodate = 1;
- pointers[stripe++] = kmap_local_sector(sector);
+ pointers[stripe++] = kmap_local_paddr(rbio_pstripe_paddr(rbio, sector_nr, step_nr));
if (has_qstripe) {
/*
* RAID6, add the qstripe and call the library function
* to fill in our p/q
*/
- sector = rbio_qstripe_sector(rbio, sectornr);
- sector->uptodate = 1;
- pointers[stripe++] = kmap_local_sector(sector);
+ pointers[stripe++] = kmap_local_paddr(
+ rbio_qstripe_paddr(rbio, sector_nr, step_nr));
assert_rbio(rbio);
- raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
- pointers);
+ raid6_call.gen_syndrome(rbio->real_stripes, step, pointers);
} else {
/* raid5 */
- memcpy(pointers[rbio->nr_data], pointers[0], sectorsize);
- run_xor(pointers + 1, rbio->nr_data - 1, sectorsize);
+ memcpy(pointers[rbio->nr_data], pointers[0], step);
+ run_xor(pointers + 1, rbio->nr_data - 1, step);
}
for (stripe = stripe - 1; stripe >= 0; stripe--)
kunmap_local(pointers[stripe]);
}
+/* Generate PQ for one vertical stripe. */
+static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
+{
+ const bool has_qstripe = (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6);
+
+ for (int i = 0; i < rbio->sector_nsteps; i++)
+ generate_pq_vertical_step(rbio, sectornr, i);
+
+ set_bit(rbio_sector_index(rbio, rbio->nr_data, sectornr),
+ rbio->stripe_uptodate_bitmap);
+ if (has_qstripe)
+ set_bit(rbio_sector_index(rbio, rbio->nr_data + 1, sectornr),
+ rbio->stripe_uptodate_bitmap);
+}
+
static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
@@ -1372,7 +1478,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
*/
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
stripe = total_sector_nr / rbio->stripe_nsectors;
sectornr = total_sector_nr % rbio->stripe_nsectors;
@@ -1382,14 +1488,14 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
continue;
if (stripe < rbio->nr_data) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (!sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
} else {
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
}
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ ret = rbio_add_io_paddrs(rbio, bio_list, paddrs, stripe,
sectornr, REQ_OP_WRITE);
if (ret)
goto error;
@@ -1407,7 +1513,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
stripe = total_sector_nr / rbio->stripe_nsectors;
sectornr = total_sector_nr % rbio->stripe_nsectors;
@@ -1432,14 +1538,14 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
continue;
if (stripe < rbio->nr_data) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (!sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
} else {
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
}
- ret = rbio_add_io_sector(rbio, bio_list, sector,
+ ret = rbio_add_io_paddrs(rbio, bio_list, paddrs,
rbio->real_stripes,
sectornr, REQ_OP_WRITE);
if (ret)
@@ -1487,21 +1593,17 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
}
/*
- * For subpage case, we can no longer set page Up-to-date directly for
- * stripe_pages[], thus we need to locate the sector.
+ * Return the index inside the rbio->stripe_sectors[] array.
+ *
+ * Return -1 if not found.
*/
-static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
- phys_addr_t paddr)
+static int find_stripe_sector_nr(struct btrfs_raid_bio *rbio, phys_addr_t paddr)
{
- int i;
-
- for (i = 0; i < rbio->nr_sectors; i++) {
- struct sector_ptr *sector = &rbio->stripe_sectors[i];
-
- if (sector->has_paddr && sector->paddr == paddr)
- return sector;
+ for (int i = 0; i < rbio->nr_sectors; i++) {
+ if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == paddr)
+ return i;
}
- return NULL;
+ return -1;
}
/*
@@ -1510,17 +1612,23 @@ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- const u32 blocksize = rbio->bioc->fs_info->sectorsize;
+ const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 step = min(sectorsize, PAGE_SIZE);
+ u32 offset = 0;
phys_addr_t paddr;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- btrfs_bio_for_each_block_all(paddr, bio, blocksize) {
- struct sector_ptr *sector = find_stripe_sector(rbio, paddr);
+ btrfs_bio_for_each_block_all(paddr, bio, step) {
+ /* Hitting the first step of a sector. */
+ if (IS_ALIGNED(offset, sectorsize)) {
+ int sector_nr = find_stripe_sector_nr(rbio, paddr);
- ASSERT(sector);
- if (sector)
- sector->uptodate = 1;
+ ASSERT(sector_nr >= 0);
+ if (sector_nr >= 0)
+ set_bit(sector_nr, rbio->stripe_uptodate_bitmap);
+ }
+ offset += step;
}
}
@@ -1530,10 +1638,9 @@ static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
- if (rbio->stripe_sectors[i].paddr == bvec_paddr)
+ if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == bvec_paddr)
break;
- if (rbio->bio_sectors[i].has_paddr &&
- rbio->bio_sectors[i].paddr == bvec_paddr)
+ if (rbio->bio_paddrs[i * rbio->sector_nsteps] == bvec_paddr)
break;
}
ASSERT(i < rbio->nr_sectors);
@@ -1566,7 +1673,11 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 nr_steps = rbio->sector_nsteps;
int total_sector_nr = get_bio_sector_nr(rbio, bio);
+ u32 offset = 0;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
phys_addr_t paddr;
/* No data csum for the whole stripe, no need to verify. */
@@ -1577,18 +1688,24 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
return;
- btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) {
+ btrfs_bio_for_each_block_all(paddr, bio, step) {
u8 csum_buf[BTRFS_CSUM_SIZE];
- u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size;
- int ret;
+ u8 *expected_csum;
+
+ paddrs[(offset / step) % nr_steps] = paddr;
+ offset += step;
+
+ /* Not yet covering the full fs block, continue to the next step. */
+ if (!IS_ALIGNED(offset, fs_info->sectorsize))
+ continue;
/* No csum for this sector, skip to the next sector. */
if (!test_bit(total_sector_nr, rbio->csum_bitmap))
continue;
- ret = btrfs_check_block_csum(fs_info, paddr,
- csum_buf, expected_csum);
- if (ret < 0)
+ expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size;
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf);
+ if (unlikely(memcmp(csum_buf, expected_csum, fs_info->csum_size) != 0))
set_bit(total_sector_nr, rbio->error_bitmap);
total_sector_nr++;
}
@@ -1785,10 +1902,9 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
int stripe_nr, int sector_nr)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *csum_expected;
- int ret;
if (!rbio->csum_bitmap || !rbio->csum_buf)
return 0;
@@ -1801,54 +1917,32 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
* bio list if possible.
*/
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
- sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
+ paddrs = sector_paddrs_in_rbio(rbio, stripe_nr, sector_nr, 0);
} else {
- sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe_nr, sector_nr);
}
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
- ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected);
- return ret;
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf);
+ if (unlikely(memcmp(csum_buf, csum_expected, fs_info->csum_size) != 0))
+ return -EIO;
+ return 0;
}
-/*
- * Recover a vertical stripe specified by @sector_nr.
- * @*pointers are the pre-allocated pointers by the caller, so we don't
- * need to allocate/free the pointers again and again.
- */
-static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
- void **pointers, void **unmap_array)
+static void recover_vertical_step(struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr,
+ unsigned int step_nr,
+ int faila, int failb,
+ void **pointers, void **unmap_array)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
- struct sector_ptr *sector;
- const u32 sectorsize = fs_info->sectorsize;
- int found_errors;
- int faila;
- int failb;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
int stripe_nr;
- int ret = 0;
- /*
- * Now we just use bitmap to mark the horizontal stripes in
- * which we have data when doing parity scrub.
- */
- if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
- !test_bit(sector_nr, &rbio->dbitmap))
- return 0;
-
- found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila,
- &failb);
- /*
- * No errors in the vertical stripe, skip it. Can happen for recovery
- * which only part of a stripe failed csum check.
- */
- if (!found_errors)
- return 0;
-
- if (unlikely(found_errors > rbio->bioc->max_errors))
- return -EIO;
+ ASSERT(step_nr < rbio->sector_nsteps);
+ ASSERT(sector_nr < rbio->stripe_nsectors);
/*
* Setup our array of pointers with sectors from each stripe
@@ -1857,16 +1951,18 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* pointer order.
*/
for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
+ phys_addr_t paddr;
+
/*
* If we're rebuilding a read, we have to use pages from the
* bio list if possible.
*/
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
- sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
+ paddr = sector_paddr_in_rbio(rbio, stripe_nr, sector_nr, step_nr, 0);
} else {
- sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
+ paddr = rbio_stripe_paddr(rbio, stripe_nr, sector_nr, step_nr);
}
- pointers[stripe_nr] = kmap_local_sector(sector);
+ pointers[stripe_nr] = kmap_local_paddr(paddr);
unmap_array[stripe_nr] = pointers[stripe_nr];
}
@@ -1912,10 +2008,10 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
}
if (failb == rbio->real_stripes - 2) {
- raid6_datap_recov(rbio->real_stripes, sectorsize,
+ raid6_datap_recov(rbio->real_stripes, step,
faila, pointers);
} else {
- raid6_2data_recov(rbio->real_stripes, sectorsize,
+ raid6_2data_recov(rbio->real_stripes, step,
faila, failb, pointers);
}
} else {
@@ -1925,7 +2021,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
ASSERT(failb == -1);
pstripe:
/* Copy parity block into failed block to start with */
- memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);
+ memcpy(pointers[faila], pointers[rbio->nr_data], step);
/* Rearrange the pointer array */
p = pointers[faila];
@@ -1935,40 +2031,66 @@ pstripe:
pointers[rbio->nr_data - 1] = p;
/* Xor in the rest */
- run_xor(pointers, rbio->nr_data - 1, sectorsize);
-
+ run_xor(pointers, rbio->nr_data - 1, step);
}
+cleanup:
+ for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
+ kunmap_local(unmap_array[stripe_nr]);
+}
+
+/*
+ * Recover a vertical stripe specified by @sector_nr.
+ * @*pointers are the pre-allocated pointers by the caller, so we don't
+ * need to allocate/free the pointers again and again.
+ */
+static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
+ void **pointers, void **unmap_array)
+{
+ int found_errors;
+ int faila;
+ int failb;
+ int ret = 0;
+
/*
- * No matter if this is a RMW or recovery, we should have all
- * failed sectors repaired in the vertical stripe, thus they are now
- * uptodate.
- * Especially if we determine to cache the rbio, we need to
- * have at least all data sectors uptodate.
- *
- * If possible, also check if the repaired sector matches its data
- * checksum.
+ * Now we just use bitmap to mark the horizontal stripes in
+ * which we have data when doing parity scrub.
+ */
+ if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
+ !test_bit(sector_nr, &rbio->dbitmap))
+ return 0;
+
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr, &faila,
+ &failb);
+ /*
+ * No errors in the vertical stripe, skip it. Can happen for recovery
+ * which only part of a stripe failed csum check.
*/
+ if (!found_errors)
+ return 0;
+
+ if (unlikely(found_errors > rbio->bioc->max_errors))
+ return -EIO;
+
+ for (int i = 0; i < rbio->sector_nsteps; i++)
+ recover_vertical_step(rbio, sector_nr, i, faila, failb,
+ pointers, unmap_array);
if (faila >= 0) {
ret = verify_one_sector(rbio, faila, sector_nr);
if (ret < 0)
- goto cleanup;
+ return ret;
- sector = rbio_stripe_sector(rbio, faila, sector_nr);
- sector->uptodate = 1;
+ set_bit(rbio_sector_index(rbio, faila, sector_nr),
+ rbio->stripe_uptodate_bitmap);
}
if (failb >= 0) {
ret = verify_one_sector(rbio, failb, sector_nr);
if (ret < 0)
- goto cleanup;
+ return ret;
- sector = rbio_stripe_sector(rbio, failb, sector_nr);
- sector->uptodate = 1;
+ set_bit(rbio_sector_index(rbio, failb, sector_nr),
+ rbio->stripe_uptodate_bitmap);
}
-
-cleanup:
- for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
- kunmap_local(unmap_array[stripe_nr]);
return ret;
}
@@ -2043,7 +2165,7 @@ static void recover_rbio(struct btrfs_raid_bio *rbio)
total_sector_nr++) {
int stripe = total_sector_nr / rbio->stripe_nsectors;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
/*
* Skip the range which has error. It can be a range which is
@@ -2060,8 +2182,8 @@ static void recover_rbio(struct btrfs_raid_bio *rbio)
continue;
}
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
sectornr, REQ_OP_READ);
if (ret < 0) {
bio_list_put(&bio_list);
@@ -2106,7 +2228,7 @@ static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_n
int faila;
int failb;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
&faila, &failb);
/* This vertical stripe doesn't have errors. */
if (!found_errors)
@@ -2250,13 +2372,13 @@ static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
*/
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct sector_ptr *sector;
int stripe = total_sector_nr / rbio->stripe_nsectors;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
+ phys_addr_t *paddrs;
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector,
- stripe, sectornr, REQ_OP_READ);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
+ sectornr, REQ_OP_READ);
if (ret) {
bio_list_put(&bio_list);
return ret;
@@ -2310,14 +2432,15 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
int i;
for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) {
- struct sector_ptr *sector = &rbio->stripe_sectors[i];
+ phys_addr_t paddr = rbio->stripe_paddrs[i * rbio->sector_nsteps];
/*
* We have a sector which doesn't have page nor uptodate,
* thus this rbio can not be cached one, as cached one must
* have all its data sectors present and uptodate.
*/
- if (!sector->has_paddr || !sector->uptodate)
+ if (paddr == INVALID_PADDR ||
+ !test_bit(i, rbio->stripe_uptodate_bitmap))
return true;
}
return false;
@@ -2398,7 +2521,7 @@ static void rmw_rbio(struct btrfs_raid_bio *rbio)
for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL);
+ found_errors = get_rbio_vertical_errors(rbio, sectornr, NULL, NULL);
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
@@ -2469,47 +2592,121 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
return rbio;
}
+static int alloc_rbio_sector_pages(struct btrfs_raid_bio *rbio,
+ int sector_nr)
+{
+ const u32 step = min(PAGE_SIZE, rbio->bioc->fs_info->sectorsize);
+ const u32 base = sector_nr * rbio->sector_nsteps;
+
+ for (int i = base; i < base + rbio->sector_nsteps; i++) {
+ const unsigned int page_index = (i * step) >> PAGE_SHIFT;
+ struct page *page;
+
+ if (rbio->stripe_pages[page_index])
+ continue;
+ page = alloc_page(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ rbio->stripe_pages[page_index] = page;
+ }
+ return 0;
+}
+
/*
* We just scrub the parity that we have correct data on the same horizontal,
* so we needn't allocate all pages for all the stripes.
*/
static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
int total_sector_nr;
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct page *page;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
- int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT;
+ int ret;
if (!test_bit(sectornr, &rbio->dbitmap))
continue;
- if (rbio->stripe_pages[index])
- continue;
- page = alloc_page(GFP_NOFS);
- if (!page)
- return -ENOMEM;
- rbio->stripe_pages[index] = page;
+ ret = alloc_rbio_sector_pages(rbio, total_sector_nr);
+ if (ret < 0)
+ return ret;
}
index_stripe_sectors(rbio);
return 0;
}
+/* Return true if the content of the step matches the caclulated one. */
+static bool verify_one_parity_step(struct btrfs_raid_bio *rbio,
+ void *pointers[], unsigned int sector_nr,
+ unsigned int step_nr)
+{
+ const unsigned int nr_data = rbio->nr_data;
+ const bool has_qstripe = (rbio->real_stripes - rbio->nr_data == 2);
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
+ void *parity;
+ bool ret = false;
+
+ ASSERT(step_nr < rbio->sector_nsteps);
+
+ /* First collect one page from each data stripe. */
+ for (int stripe = 0; stripe < nr_data; stripe++)
+ pointers[stripe] = kmap_local_paddr(
+ sector_paddr_in_rbio(rbio, stripe, sector_nr,
+ step_nr, 0));
+
+ if (has_qstripe) {
+ assert_rbio(rbio);
+ /* RAID6, call the library function to fill in our P/Q. */
+ raid6_call.gen_syndrome(rbio->real_stripes, step, pointers);
+ } else {
+ /* RAID5. */
+ memcpy(pointers[nr_data], pointers[0], step);
+ run_xor(pointers + 1, nr_data - 1, step);
+ }
+
+ /* Check scrubbing parity and repair it. */
+ parity = kmap_local_paddr(rbio_stripe_paddr(rbio, rbio->scrubp, sector_nr, step_nr));
+ if (memcmp(parity, pointers[rbio->scrubp], step) != 0)
+ memcpy(parity, pointers[rbio->scrubp], step);
+ else
+ ret = true;
+ kunmap_local(parity);
+
+ for (int stripe = nr_data - 1; stripe >= 0; stripe--)
+ kunmap_local(pointers[stripe]);
+ return ret;
+}
+
+/*
+ * The @pointers array should have the P/Q parity already mapped.
+ */
+static void verify_one_parity_sector(struct btrfs_raid_bio *rbio,
+ void *pointers[], unsigned int sector_nr)
+{
+ bool found_error = false;
+
+ for (int step_nr = 0; step_nr < rbio->sector_nsteps; step_nr++) {
+ bool match;
+
+ match = verify_one_parity_step(rbio, pointers, sector_nr, step_nr);
+ if (!match)
+ found_error = true;
+ }
+ if (!found_error)
+ bitmap_clear(&rbio->dbitmap, sector_nr, 1);
+}
+
static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
{
struct btrfs_io_context *bioc = rbio->bioc;
- const u32 sectorsize = bioc->fs_info->sectorsize;
void **pointers = rbio->finish_pointers;
unsigned long *pbitmap = &rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
- int stripe;
int sectornr;
bool has_qstripe;
struct page *page;
- struct sector_ptr p_sector = { 0 };
- struct sector_ptr q_sector = { 0 };
+ phys_addr_t p_paddr = INVALID_PADDR;
+ phys_addr_t q_paddr = INVALID_PADDR;
struct bio_list bio_list;
int is_replace = 0;
int ret;
@@ -2542,72 +2739,36 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
- p_sector.has_paddr = true;
- p_sector.paddr = page_to_phys(page);
- p_sector.uptodate = 1;
+ p_paddr = page_to_phys(page);
page = NULL;
+ pointers[nr_data] = kmap_local_paddr(p_paddr);
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
page = alloc_page(GFP_NOFS);
if (!page) {
- __free_page(phys_to_page(p_sector.paddr));
- p_sector.has_paddr = false;
+ __free_page(phys_to_page(p_paddr));
+ p_paddr = INVALID_PADDR;
return -ENOMEM;
}
- q_sector.has_paddr = true;
- q_sector.paddr = page_to_phys(page);
- q_sector.uptodate = 1;
+ q_paddr = page_to_phys(page);
page = NULL;
- pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector);
+ pointers[rbio->real_stripes - 1] = kmap_local_paddr(q_paddr);
}
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
/* Map the parity stripe just once */
- pointers[nr_data] = kmap_local_sector(&p_sector);
-
- for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
- void *parity;
-
- /* first collect one page from each data stripe */
- for (stripe = 0; stripe < nr_data; stripe++) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_sector(sector);
- }
- if (has_qstripe) {
- assert_rbio(rbio);
- /* RAID6, call the library function to fill in our P/Q */
- raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
- pointers);
- } else {
- /* raid5 */
- memcpy(pointers[nr_data], pointers[0], sectorsize);
- run_xor(pointers + 1, nr_data - 1, sectorsize);
- }
-
- /* Check scrubbing parity and repair it */
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- parity = kmap_local_sector(sector);
- if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
- memcpy(parity, pointers[rbio->scrubp], sectorsize);
- else
- /* Parity is right, needn't writeback */
- bitmap_clear(&rbio->dbitmap, sectornr, 1);
- kunmap_local(parity);
-
- for (stripe = nr_data - 1; stripe >= 0; stripe--)
- kunmap_local(pointers[stripe]);
- }
+ for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors)
+ verify_one_parity_sector(rbio, pointers, sectornr);
kunmap_local(pointers[nr_data]);
- __free_page(phys_to_page(p_sector.paddr));
- p_sector.has_paddr = false;
- if (q_sector.has_paddr) {
- __free_page(phys_to_page(q_sector.paddr));
- q_sector.has_paddr = false;
+ __free_page(phys_to_page(p_paddr));
+ p_paddr = INVALID_PADDR;
+ if (q_paddr != INVALID_PADDR) {
+ __free_page(phys_to_page(q_paddr));
+ q_paddr = INVALID_PADDR;
}
/*
@@ -2616,10 +2777,10 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
* everything else.
*/
for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
+ paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->scrubp,
sectornr, REQ_OP_WRITE);
if (ret)
goto cleanup;
@@ -2634,11 +2795,10 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
*/
ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio);
for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector,
- rbio->real_stripes,
+ paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->real_stripes,
sectornr, REQ_OP_WRITE);
if (ret)
goto cleanup;
@@ -2686,7 +2846,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
int failb;
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
&faila, &failb);
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
@@ -2755,7 +2915,7 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
total_sector_nr++) {
int sectornr = total_sector_nr % rbio->stripe_nsectors;
int stripe = total_sector_nr / rbio->stripe_nsectors;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
/* No data in the vertical stripe, no need to read. */
if (!test_bit(sectornr, &rbio->dbitmap))
@@ -2763,22 +2923,23 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
/*
* We want to find all the sectors missing from the rbio and
- * read them from the disk. If sector_in_rbio() finds a sector
+ * read them from the disk. If sector_paddr_in_rbio() finds a sector
* in the bio list we don't need to read it off the stripe.
*/
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
/*
* The bio cache may have handed us an uptodate sector. If so,
* use it.
*/
- if (sector->uptodate)
+ if (test_bit(rbio_sector_index(rbio, stripe, sectornr),
+ rbio->stripe_uptodate_bitmap))
continue;
- ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
sectornr, REQ_OP_READ);
if (ret) {
bio_list_put(&bio_list);
@@ -2819,7 +2980,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL);
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr, NULL, NULL);
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
@@ -2857,9 +3018,6 @@ void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
unsigned int foffset = 0;
int ret;
- /* We shouldn't hit RAID56 for bs > ps cases for now. */
- ASSERT(fs_info->sectorsize <= PAGE_SIZE);
-
/*
* If we hit ENOMEM temporarily, but later at
* raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@@ -2893,8 +3051,7 @@ void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
foffset = 0;
}
}
- for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits;
- sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits;
- sector_nr++)
- rbio->stripe_sectors[sector_nr].uptodate = true;
+ bitmap_set(rbio->stripe_uptodate_bitmap,
+ offset_in_full_stripe >> fs_info->sectorsize_bits,
+ BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 84c4d1d29c7a..1f463ecf7e41 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -16,7 +16,6 @@
#include "volumes.h"
struct page;
-struct sector_ptr;
struct btrfs_fs_info;
enum btrfs_rbio_ops {
@@ -25,6 +24,84 @@ enum btrfs_rbio_ops {
BTRFS_RBIO_PARITY_SCRUB,
};
+/*
+ * Overview of btrfs_raid_bio.
+ *
+ * One btrfs_raid_bio represents a full stripe of RAID56, including both data
+ * and P/Q stripes. For now, each data and P/Q stripe is of a fixed length (64K).
+ *
+ * One btrfs_raid_bio can have one or more bios from higher layer, covering
+ * part or all of the data stripes.
+ *
+ * [PAGES FROM HIGHER LAYER BIOS]
+ * Higher layer bios are in the btrfs_raid_bio::bio_list.
+ *
+ * Pages from the bio_list are represented like the following:
+ *
+ * bio_list: |<- Bio 1 ->| |<- Bio 2 ->| ...
+ * bio_paddrs: [0] [1] [2] [3] [4] [5] ...
+ *
+ * If there is a bio covering a sector (one btrfs fs block), the corresponding
+ * pointer in btrfs_raid_bio::bio_paddrs[] will point to the physical address
+ * (with the offset inside the page) of the corresponding bio.
+ *
+ * If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will
+ * be INVALID_PADDR.
+ *
+ * The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)).
+ *
+ * [PAGES FOR INTERNAL USAGES]
+ * Pages not covered by any bio or belonging to P/Q stripes are stored in
+ * btrfs_raid_bio::stripe_pages[] and stripe_paddrs[], like the following:
+ *
+ * stripe_pages: |<- Page 0 ->|<- Page 1 ->| ...
+ * stripe_paddrs: [0] [1] [2] [3] [4] ...
+ *
+ * stripe_pages[] array stores all the pages covering the full stripe, including
+ * data and P/Q pages.
+ * stripe_pages[0] is the first page of the first data stripe.
+ * stripe_pages[BTRFS_STRIPE_LEN / PAGE_SIZE] is the first page of the second
+ * data stripe.
+ *
+ * Some pointers inside stripe_pages[] can be NULL, e.g. for a full stripe write
+ * (the bio covers all data stripes) there is no need to allocate pages for
+ * data stripes (can grab from bio_paddrs[]).
+ *
+ * If the corresponding page of stripe_paddrs[i] is not allocated, the value of
+ * stripe_paddrs[i] will be INVALID_PADDR.
+ *
+ * The length of each entry in stripe_paddrs[] is a step.
+ *
+ * [LOCATING A SECTOR]
+ * To locate a sector for IO, we need the following info:
+ *
+ * - stripe_nr
+ * Starts from 0 (representing the first data stripe), ends at
+ * @nr_data (RAID5, P stripe) or @nr_data + 1 (RAID6, Q stripe).
+ *
+ * - sector_nr
+ * Starts from 0 (representing the first sector of the stripe), ends
+ * at BTRFS_STRIPE_LEN / sectorsize - 1.
+ *
+ * - step_nr
+ * A step is min(sector_size, PAGE_SIZE).
+ *
+ * Starts from 0 (representing the first step of the sector), ends
+ * at @sector_nsteps - 1.
+ *
+ * For most call sites they do not need to bother this parameter.
+ * It is for bs > ps support and only for vertical stripe related works.
+ * (e.g. RMW/recover)
+ *
+ * - from which array
+ * Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the
+ * bio_paddrs[] (aka, from the higher layer bios).
+ *
+ * For IO, a physical address is returned, so that we can extract the page and
+ * the offset inside the page for IO.
+ * A special value INVALID_PADDR represents when the physical address is invalid,
+ * normally meaning there is no page allocated for the specified sector.
+ */
struct btrfs_raid_bio {
struct btrfs_io_context *bioc;
@@ -82,6 +159,14 @@ struct btrfs_raid_bio {
/* How many sectors there are for each stripe */
u8 stripe_nsectors;
+ /*
+ * How many steps there are for one sector.
+ *
+ * For bs > ps cases, it's sectorsize / PAGE_SIZE.
+ * For bs <= ps cases, it's always 1.
+ */
+ u8 sector_nsteps;
+
/* Stripe number that we're scrubbing */
u8 scrubp;
@@ -116,13 +201,13 @@ struct btrfs_raid_bio {
struct page **stripe_pages;
/* Pointers to the sectors in the bio_list, for faster lookup */
- struct sector_ptr *bio_sectors;
+ phys_addr_t *bio_paddrs;
- /*
- * For subpage support, we need to map each sector to above
- * stripe_pages.
- */
- struct sector_ptr *stripe_sectors;
+ /* Pointers to the sectors in the stripe_pages[]. */
+ phys_addr_t *stripe_paddrs;
+
+ /* Each set bit means the corresponding sector in stripe_sectors[] is uptodate. */
+ unsigned long *stripe_uptodate_bitmap;
/* Allocated with real_stripes-many pointers for finish_*() calls */
void **finish_pointers;
@@ -131,10 +216,6 @@ struct btrfs_raid_bio {
* The bitmap recording where IO errors happened.
* Each bit is corresponding to one sector in either bio_sectors[] or
* stripe_sectors[] array.
- *
- * The reason we don't use another bit in sector_ptr is, we have two
- * arrays of sectors, and a lot of IO can use sectors in both arrays.
- * Thus making it much harder to iterate.
*/
unsigned long *error_bitmap;
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 5465a5eae9b2..b5fe95baf92e 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/blkdev.h>
+#include <linux/fscrypt.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "fs.h"
@@ -343,7 +344,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans;
- char *buf = NULL;
+ char AUTO_KVFREE(buf);
struct btrfs_key key;
u32 nritems;
int slot;
@@ -358,10 +359,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
return ret;
path = btrfs_alloc_path();
- if (!path) {
- kvfree(buf);
+ if (!path)
return ret;
- }
path->reada = READA_FORWARD;
/* Clone data */
@@ -611,7 +610,6 @@ process_slot:
}
out:
- kvfree(buf);
clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
return ret;
@@ -792,6 +790,10 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
ASSERT(inode_in->vfs_inode.i_sb == inode_out->vfs_inode.i_sb);
}
+ /* Can only reflink encrypted files if both files are encrypted. */
+ if (IS_ENCRYPTED(&inode_in->vfs_inode) != IS_ENCRYPTED(&inode_out->vfs_inode))
+ return -EINVAL;
+
/* Don't make the dst file partly checksummed */
if ((inode_in->flags & BTRFS_INODE_NODATASUM) !=
(inode_out->flags & BTRFS_INODE_NODATASUM)) {
@@ -868,6 +870,9 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
bool same_inode = dst_inode == src_inode;
int ret;
+ if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(src_file)))))
+ return -EIO;
+
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0765e06d00b8..5bfefc3e9c06 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -511,7 +511,7 @@ static void __del_reloc_root(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *rb_node;
- struct mapping_node *node = NULL;
+ struct mapping_node AUTO_KFREE(node);
struct reloc_control *rc = fs_info->reloc_ctl;
bool put_ref = false;
@@ -544,7 +544,6 @@ static void __del_reloc_root(struct btrfs_root *root)
spin_unlock(&fs_info->trans_lock);
if (put_ref)
btrfs_put_root(root);
- kfree(node);
}
/*
@@ -586,10 +585,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *reloc_root;
struct extent_buffer *eb;
- struct btrfs_root_item *root_item;
+ struct btrfs_root_item AUTO_KFREE(root_item);
struct btrfs_key root_key;
int ret = 0;
- bool must_abort = false;
root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
if (!root_item)
@@ -615,17 +613,16 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress);
btrfs_err(fs_info,
- "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)",
- objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset);
- ret = -EUCLEAN;
- goto fail;
+ "cannot relocate partially dropped subvolume %llu, drop progress key " BTRFS_KEY_FMT,
+ objectid, BTRFS_KEY_FMT_VALUE(&cpu_key));
+ return ERR_PTR(-EUCLEAN);
}
/* called by btrfs_init_reloc_root */
ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
BTRFS_TREE_RELOC_OBJECTID);
if (ret)
- goto fail;
+ return ERR_PTR(ret);
/*
* Set the last_snapshot field to the generation of the commit
@@ -648,14 +645,13 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_copy_root(trans, root, root->node, &eb,
BTRFS_TREE_RELOC_OBJECTID);
if (ret)
- goto fail;
+ return ERR_PTR(ret);
}
/*
* We have changed references at this point, we must abort the
- * transaction if anything fails.
+ * transaction if anything fails (i.e. 'goto abort').
*/
- must_abort = true;
memcpy(root_item, &root->root_item, sizeof(*root_item));
btrfs_set_root_bytenr(root_item, eb->start);
@@ -675,9 +671,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_insert_root(trans, fs_info->tree_root,
&root_key, root_item);
if (ret)
- goto fail;
-
- kfree(root_item);
+ goto abort;
reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
if (IS_ERR(reloc_root)) {
@@ -687,11 +681,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
btrfs_set_root_last_trans(reloc_root, trans->transid);
return reloc_root;
-fail:
- kfree(root_item);
+
abort:
- if (must_abort)
- btrfs_abort_transaction(trans, ret);
+ btrfs_abort_transaction(trans, ret);
return ERR_PTR(ret);
}
@@ -2947,7 +2939,7 @@ static int relocate_file_extent_cluster(struct reloc_control *rc)
const struct file_extent_cluster *cluster = &rc->cluster;
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
u64 cur_file_offset = cluster->start - offset;
- struct file_ra_state *ra;
+ struct file_ra_state AUTO_KFREE(ra);
int cluster_nr = 0;
int ret = 0;
@@ -2960,13 +2952,13 @@ static int relocate_file_extent_cluster(struct reloc_control *rc)
ret = prealloc_file_extent_cluster(rc);
if (ret)
- goto out;
+ return ret;
file_ra_state_init(ra, inode->i_mapping);
ret = setup_relocation_extent_mapping(rc);
if (ret)
- goto out;
+ return ret;
while (cur_file_offset < cluster->end - offset) {
ret = relocate_one_folio(rc, ra, &cluster_nr, &cur_file_offset);
@@ -2975,8 +2967,6 @@ static int relocate_file_extent_cluster(struct reloc_control *rc)
}
if (ret == 0)
WARN_ON(cluster_nr != cluster->nr);
-out:
- kfree(ra);
return ret;
}
@@ -3175,8 +3165,8 @@ again:
key.offset = blocksize;
}
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
@@ -3368,8 +3358,8 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = 0;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
0, 0);
if (ret < 0)
@@ -3882,8 +3872,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
struct inode *inode;
struct btrfs_path *path;
int ret;
- int rw = 0;
- int err = 0;
+ bool bg_is_ro = false;
/*
* This only gets set if we had a half-deleted snapshot on mount. We
@@ -3925,24 +3914,20 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
}
ret = reloc_chunk_start(fs_info);
- if (ret < 0) {
- err = ret;
+ if (ret < 0)
goto out_put_bg;
- }
rc->extent_root = extent_root;
rc->block_group = bg;
ret = btrfs_inc_block_group_ro(rc->block_group, true);
- if (ret) {
- err = ret;
+ if (ret)
goto out;
- }
- rw = 1;
+ bg_is_ro = true;
path = btrfs_alloc_path();
if (!path) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto out;
}
@@ -3954,14 +3939,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
else
ret = PTR_ERR(inode);
- if (ret && ret != -ENOENT) {
- err = ret;
+ if (ret && ret != -ENOENT)
goto out;
- }
rc->data_inode = create_reloc_inode(rc->block_group);
if (IS_ERR(rc->data_inode)) {
- err = PTR_ERR(rc->data_inode);
+ ret = PTR_ERR(rc->data_inode);
rc->data_inode = NULL;
goto out;
}
@@ -3982,8 +3965,6 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
mutex_lock(&fs_info->cleaner_mutex);
ret = relocate_block_group(rc);
mutex_unlock(&fs_info->cleaner_mutex);
- if (ret < 0)
- err = ret;
finishes_stage = rc->stage;
/*
@@ -3996,16 +3977,18 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
* out of the loop if we hit an error.
*/
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
- ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0,
- (u64)-1);
- if (ret)
- err = ret;
+ int wb_ret;
+
+ wb_ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0,
+ (u64)-1);
+ if (wb_ret && ret == 0)
+ ret = wb_ret;
invalidate_mapping_pages(rc->data_inode->i_mapping,
0, -1);
rc->stage = UPDATE_DATA_PTRS;
}
- if (err < 0)
+ if (ret < 0)
goto out;
if (rc->extents_found == 0)
@@ -4021,14 +4004,14 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
WARN_ON(rc->block_group->reserved > 0);
WARN_ON(rc->block_group->used > 0);
out:
- if (err && rw)
+ if (ret && bg_is_ro)
btrfs_dec_block_group_ro(rc->block_group);
iput(rc->data_inode);
reloc_chunk_end(fs_info);
out_put_bg:
btrfs_put_block_group(bg);
free_reloc_control(rc);
- return err;
+ return ret;
}
static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index d07eab70f759..6a7e297ab0a7 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -147,8 +147,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
if (unlikely(ret > 0)) {
btrfs_crit(fs_info,
- "unable to find root key (%llu %u %llu) in tree %llu",
- key->objectid, key->type, key->offset, btrfs_root_id(root));
+ "unable to find root key " BTRFS_KEY_FMT " in tree %llu",
+ BTRFS_KEY_FMT_VALUE(key), btrfs_root_id(root));
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ba20d9286a34..a40ee41f42c6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -463,10 +463,10 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- sctx->extent_path.search_commit_root = 1;
- sctx->extent_path.skip_locking = 1;
- sctx->csum_path.search_commit_root = 1;
- sctx->csum_path.skip_locking = 1;
+ sctx->extent_path.search_commit_root = true;
+ sctx->extent_path.skip_locking = true;
+ sctx->csum_path.search_commit_root = true;
+ sctx->csum_path.skip_locking = true;
for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
int ret;
@@ -505,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
struct btrfs_inode_item *inode_item;
struct scrub_warning *swarn = warn_ctx;
struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
- struct inode_fs_paths *ipath = NULL;
+ struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL;
struct btrfs_root *local_root;
struct btrfs_key key;
@@ -569,7 +569,6 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
(char *)(unsigned long)ipath->fspath->val[i]);
btrfs_put_root(local_root);
- free_ipath(ipath);
return 0;
err:
@@ -580,7 +579,6 @@ err:
swarn->physical,
root, inum, offset, ret);
- free_ipath(ipath);
return 0;
}
@@ -777,10 +775,10 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
-"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
+"scrub: tree block %llu mirror %u has bad csum, has " BTRFS_CSUM_FMT " want " BTRFS_CSUM_FMT,
logical, stripe->mirror_num,
- CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
- CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
+ BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
+ BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
return;
}
if (stripe->sectors[sector_nr].generation !=
@@ -929,10 +927,11 @@ static int calc_next_mirror(int mirror, int num_copies)
static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *stripe,
int sector_nr)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
int ret;
- ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), bbio->fs_info->sectorsize,
+ ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), fs_info->sectorsize,
offset_in_page(kaddr));
/*
* Caller should ensure the bbio has enough size.
@@ -942,7 +941,21 @@ static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *st
* to create the minimal amount of bio vectors, for fs block size < page
* size cases.
*/
- ASSERT(ret == bbio->fs_info->sectorsize);
+ ASSERT(ret == fs_info->sectorsize);
+}
+
+static struct btrfs_bio *alloc_scrub_bbio(struct btrfs_fs_info *fs_info,
+ unsigned int nr_vecs, blk_opf_t opf,
+ u64 logical,
+ btrfs_bio_end_io_t end_io, void *private)
+{
+ struct btrfs_bio *bbio;
+
+ bbio = btrfs_bio_alloc(nr_vecs, opf, BTRFS_I(fs_info->btree_inode),
+ logical, end_io, private);
+ bbio->is_scrub = true;
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ return bbio;
}
static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
@@ -953,8 +966,9 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
int i;
- ASSERT(stripe->mirror_num >= 1);
- ASSERT(atomic_read(&stripe->pending_io) == 0);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
+ ASSERT(atomic_read(&stripe->pending_io) == 0,
+ "atomic_read(&stripe->pending_io)=%d", atomic_read(&stripe->pending_io));
for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) {
/* The current sector cannot be merged, submit the bio. */
@@ -968,12 +982,10 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
bbio = NULL;
}
- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_repair_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
- }
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ stripe->logical + (i << fs_info->sectorsize_bits),
+ scrub_repair_read_endio, stripe);
scrub_bio_add_sector(bbio, stripe, i);
}
@@ -1019,7 +1031,7 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
int ret;
/* For scrub, our mirror_num should always start at 1. */
- ASSERT(stripe->mirror_num >= 1);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
stripe->logical, &mapped_len, &bioc,
NULL, NULL);
@@ -1159,7 +1171,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
int mirror;
int i;
- ASSERT(stripe->mirror_num > 0);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
wait_scrub_stripe_io(stripe);
scrub_verify_one_stripe(stripe, scrub_bitmap_read_has_extent(stripe));
@@ -1284,7 +1296,7 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
- for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ for (i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
}
@@ -1352,13 +1364,10 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE,
- fs_info, scrub_write_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (sector_nr << fs_info->sectorsize_bits)) >>
- SECTOR_SHIFT;
- }
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_WRITE,
+ stripe->logical + (sector_nr << fs_info->sectorsize_bits),
+ scrub_write_endio, stripe);
scrub_bio_add_sector(bbio, stripe, sector_nr);
}
if (bbio)
@@ -1478,7 +1487,7 @@ static int compare_extent_item_range(struct btrfs_path *path,
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY ||
- key.type == BTRFS_METADATA_ITEM_KEY);
+ key.type == BTRFS_METADATA_ITEM_KEY, "key.type=%u", key.type);
if (key.type == BTRFS_METADATA_ITEM_KEY)
len = fs_info->nodesize;
else
@@ -1583,7 +1592,7 @@ static void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret,
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_METADATA_ITEM_KEY ||
- key.type == BTRFS_EXTENT_ITEM_KEY);
+ key.type == BTRFS_EXTENT_ITEM_KEY, "key.type=%u", key.type);
*extent_start_ret = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
*size_ret = path->nodes[0]->fs_info->nodesize;
@@ -1681,7 +1690,9 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
scrub_stripe_reset_bitmaps(stripe);
/* The range must be inside the bg. */
- ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
+ ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length,
+ "bg->start=%llu logical_start=%llu logical_end=%llu end=%llu",
+ bg->start, logical_start, logical_end, bg->start + bg->length);
ret = find_first_extent_item(extent_root, extent_path, logical_start,
logical_len);
@@ -1849,9 +1860,8 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
continue;
}
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ logical, scrub_read_endio, stripe);
}
scrub_bio_add_sector(bbio, stripe, i);
@@ -1888,10 +1898,8 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
return;
}
- bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info,
- scrub_read_endio, stripe);
-
- bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ,
+ stripe->logical, scrub_read_endio, stripe);
/* Read the whole range inside the chunk boundary. */
for (unsigned int cur = 0; cur < nr_sectors; cur++)
scrub_bio_add_sector(bbio, stripe, cur);
@@ -2069,37 +2077,135 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
return 0;
}
+/*
+ * Return 0 if we should not cancel the scrub.
+ * Return <0 if we need to cancel the scrub, returned value will
+ * indicate the reason:
+ * - -ECANCELED - Being explicitly canceled through ioctl.
+ * - -EINTR - Being interrupted by signal or fs/process freezing.
+ */
+static int should_cancel_scrub(const struct scrub_ctx *sctx)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+
+ if (atomic_read(&fs_info->scrub_cancel_req) ||
+ atomic_read(&sctx->cancel_req))
+ return -ECANCELED;
+
+ /*
+ * The user (e.g. fsfreeze command) or power management (PM)
+ * suspend/hibernate can freeze the fs. And PM suspend/hibernate will
+ * also freeze all user processes.
+ *
+ * A user process can only be frozen when it is in user space, thus we
+ * have to cancel the run so that the process can return to the user
+ * space.
+ *
+ * Furthermore we have to check both filesystem and process freezing,
+ * as PM can be configured to freeze the filesystems before processes.
+ *
+ * If we only check fs freezing, then suspend without fs freezing
+ * will timeout, as the process is still in kernel space.
+ *
+ * If we only check process freezing, then suspend with fs freezing
+ * will timeout, as the running scrub will prevent the fs from being frozen.
+ */
+ if (fs_info->sb->s_writers.frozen > SB_UNFROZEN ||
+ freezing(current) || signal_pending(current))
+ return -EINTR;
+ return 0;
+}
+
+static int scrub_raid56_cached_parity(struct scrub_ctx *sctx,
+ struct btrfs_device *scrub_dev,
+ struct btrfs_chunk_map *map,
+ u64 full_stripe_start,
+ unsigned long *extent_bitmap)
+{
+ DECLARE_COMPLETION_ONSTACK(io_done);
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_raid_bio *rbio;
+ struct bio bio;
+ const int data_stripes = nr_data_stripes(map);
+ u64 length = btrfs_stripe_nr_to_offset(data_stripes);
+ int ret;
+
+ bio_init(&bio, NULL, NULL, 0, REQ_OP_READ);
+ bio.bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
+ bio.bi_private = &io_done;
+ bio.bi_end_io = raid56_scrub_wait_endio;
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
+ &length, &bioc, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ /* For RAID56 write there must be an @bioc allocated. */
+ ASSERT(bioc);
+ rbio = raid56_parity_alloc_scrub_rbio(&bio, bioc, scrub_dev, extent_bitmap,
+ BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
+ btrfs_put_bioc(bioc);
+ if (!rbio) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* Use the recovered stripes as cache to avoid read them from disk again. */
+ for (int i = 0; i < data_stripes; i++) {
+ struct scrub_stripe *stripe = &sctx->raid56_data_stripes[i];
+
+ raid56_parity_cache_data_folios(rbio, stripe->folios,
+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
+ }
+ raid56_parity_submit_scrub_rbio(rbio);
+ wait_for_completion_io(&io_done);
+ ret = blk_status_to_errno(bio.bi_status);
+out:
+ btrfs_bio_counter_dec(fs_info);
+ bio_uninit(&bio);
+ return ret;
+}
+
static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
u64 full_stripe_start)
{
- DECLARE_COMPLETION_ONSTACK(io_done);
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_raid_bio *rbio;
- struct btrfs_io_context *bioc = NULL;
struct btrfs_path extent_path = { 0 };
struct btrfs_path csum_path = { 0 };
- struct bio *bio;
struct scrub_stripe *stripe;
bool all_empty = true;
const int data_stripes = nr_data_stripes(map);
unsigned long extent_bitmap = 0;
- u64 length = btrfs_stripe_nr_to_offset(data_stripes);
int ret;
ASSERT(sctx->raid56_data_stripes);
+ ret = should_cancel_scrub(sctx);
+ if (ret < 0)
+ return ret;
+
+ if (atomic_read(&fs_info->scrub_pause_req))
+ scrub_blocked_if_needed(fs_info);
+
+ spin_lock(&bg->lock);
+ if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) {
+ spin_unlock(&bg->lock);
+ return 0;
+ }
+ spin_unlock(&bg->lock);
+
/*
* For data stripe search, we cannot reuse the same extent/csum paths,
* as the data stripe bytenr may be smaller than previous extent. Thus
* we have to use our own extent/csum paths.
*/
- extent_path.search_commit_root = 1;
- extent_path.skip_locking = 1;
- csum_path.search_commit_root = 1;
- csum_path.skip_locking = 1;
+ extent_path.search_commit_root = true;
+ extent_path.skip_locking = true;
+ csum_path.search_commit_root = true;
+ csum_path.skip_locking = true;
for (int i = 0; i < data_stripes; i++) {
int stripe_index;
@@ -2194,45 +2300,11 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
}
/* Now we can check and regenerate the P/Q stripe. */
- bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS);
- bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
- bio->bi_private = &io_done;
- bio->bi_end_io = raid56_scrub_wait_endio;
-
- btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
- &length, &bioc, NULL, NULL);
- if (ret < 0) {
- bio_put(bio);
- btrfs_put_bioc(bioc);
- btrfs_bio_counter_dec(fs_info);
- goto out;
- }
- rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap,
- BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
- btrfs_put_bioc(bioc);
- if (!rbio) {
- ret = -ENOMEM;
- bio_put(bio);
- btrfs_bio_counter_dec(fs_info);
- goto out;
- }
- /* Use the recovered stripes as cache to avoid read them from disk again. */
- for (int i = 0; i < data_stripes; i++) {
- stripe = &sctx->raid56_data_stripes[i];
-
- raid56_parity_cache_data_folios(rbio, stripe->folios,
- full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
- }
- raid56_parity_submit_scrub_rbio(rbio);
- wait_for_completion_io(&io_done);
- ret = blk_status_to_errno(bio->bi_status);
- bio_put(bio);
- btrfs_bio_counter_dec(fs_info);
-
+ ret = scrub_raid56_cached_parity(sctx, scrub_dev, map, full_stripe_start,
+ &extent_bitmap);
+out:
btrfs_release_path(&extent_path);
btrfs_release_path(&csum_path);
-out:
return ret;
}
@@ -2263,18 +2335,13 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
u64 found_logical = U64_MAX;
u64 cur_physical = physical + cur_logical - logical_start;
- /* Canceled? */
- if (atomic_read(&fs_info->scrub_cancel_req) ||
- atomic_read(&sctx->cancel_req)) {
- ret = -ECANCELED;
+ ret = should_cancel_scrub(sctx);
+ if (ret < 0)
break;
- }
- /* Paused? */
- if (atomic_read(&fs_info->scrub_pause_req)) {
- /* Push queued extents */
+
+ if (atomic_read(&fs_info->scrub_pause_req))
scrub_blocked_if_needed(fs_info);
- }
- /* Block group removed? */
+
spin_lock(&bg->lock);
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) {
spin_unlock(&bg->lock);
@@ -2529,8 +2596,6 @@ out:
}
if (sctx->is_dev_replace && ret >= 0) {
- int ret2;
-
ret2 = sync_write_pointer_for_zoned(sctx,
chunk_logical + offset,
map->stripes[stripe_index].physical,
@@ -2623,8 +2688,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
return -ENOMEM;
path->reada = READA_FORWARD;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
key.objectid = scrub_dev->devid;
key.type = BTRFS_DEV_EXTENT_KEY;
@@ -3039,6 +3104,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
unsigned int nofs_flag;
bool need_commit = false;
+ /* Set the basic fallback @last_physical before we got a sctx. */
+ if (progress)
+ progress->last_physical = start;
+
if (btrfs_fs_closing(fs_info))
return -EAGAIN;
@@ -3057,6 +3126,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
sctx = scrub_setup_ctx(fs_info, is_dev_replace);
if (IS_ERR(sctx))
return PTR_ERR(sctx);
+ sctx->stat.last_physical = start;
ret = scrub_workers_get(fs_info);
if (ret)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 96a030d28e09..2522faa97478 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -47,28 +47,30 @@
* It allows fast adding of path elements on the right side (normal path) and
* fast adding to the left side (reversed path). A reversed path can also be
* unreversed if needed.
+ *
+ * The definition of struct fs_path relies on -fms-extensions to allow
+ * including a tagged struct as an anonymous member.
*/
+struct __fs_path {
+ char *start;
+ char *end;
+
+ char *buf;
+ unsigned short buf_len:15;
+ unsigned short reversed:1;
+};
+static_assert(sizeof(struct __fs_path) < 256);
struct fs_path {
- union {
- struct {
- char *start;
- char *end;
-
- char *buf;
- unsigned short buf_len:15;
- unsigned short reversed:1;
- char inline_buf[];
- };
- /*
- * Average path length does not exceed 200 bytes, we'll have
- * better packing in the slab and higher chance to satisfy
- * an allocation later during send.
- */
- char pad[256];
- };
+ struct __fs_path;
+ /*
+ * Average path length does not exceed 200 bytes, we'll have
+ * better packing in the slab and higher chance to satisfy
+ * an allocation later during send.
+ */
+ char inline_buf[256 - sizeof(struct __fs_path)];
};
#define FS_PATH_INLINE_SIZE \
- (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
+ sizeof_field(struct fs_path, inline_buf)
/* reused for each extent */
@@ -305,7 +307,6 @@ struct send_ctx {
struct btrfs_lru_cache dir_created_cache;
struct btrfs_lru_cache dir_utimes_cache;
- /* Must be last as it ends in a flexible-array member. */
struct fs_path cur_inode_path;
};
@@ -633,9 +634,9 @@ static struct btrfs_path *alloc_path_for_send(void)
path = btrfs_alloc_path();
if (!path)
return NULL;
- path->search_commit_root = 1;
- path->skip_locking = 1;
- path->need_commit_sem = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
+ path->need_commit_sem = true;
return path;
}
@@ -1053,10 +1054,8 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
}
if (unlikely(start < p->buf)) {
btrfs_err(root->fs_info,
- "send: path ref buffer underflow for key (%llu %u %llu)",
- found_key->objectid,
- found_key->type,
- found_key->offset);
+ "send: path ref buffer underflow for key " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(found_key));
ret = -EINVAL;
goto out;
}
@@ -1136,12 +1135,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
- if (name_len > XATTR_NAME_MAX) {
+ if (unlikely(name_len > XATTR_NAME_MAX)) {
ret = -ENAMETOOLONG;
goto out;
}
- if (name_len + data_len >
- BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
+ if (unlikely(name_len + data_len >
+ BTRFS_MAX_XATTR_SIZE(root->fs_info))) {
ret = -E2BIG;
goto out;
}
@@ -1149,7 +1148,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
/*
* Path too long
*/
- if (name_len + data_len > PATH_MAX) {
+ if (unlikely(name_len + data_len > PATH_MAX)) {
ret = -ENAMETOOLONG;
goto out;
}
@@ -2460,7 +2459,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
struct btrfs_key key;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
- char *name = NULL;
+ char AUTO_KFREE(name);
int namelen;
path = btrfs_alloc_path();
@@ -2478,18 +2477,15 @@ static int send_subvol_begin(struct send_ctx *sctx)
ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
&key, path, 1, 0);
if (ret < 0)
- goto out;
- if (ret) {
- ret = -ENOENT;
- goto out;
- }
+ return ret;
+ if (ret)
+ return -ENOENT;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.type != BTRFS_ROOT_BACKREF_KEY ||
key.objectid != btrfs_root_id(send_root)) {
- ret = -ENOENT;
- goto out;
+ return -ENOENT;
}
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
namelen = btrfs_root_ref_name_len(leaf, ref);
@@ -2499,11 +2495,11 @@ static int send_subvol_begin(struct send_ctx *sctx)
if (parent_root) {
ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
if (ret < 0)
- goto out;
+ return ret;
} else {
ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
if (ret < 0)
- goto out;
+ return ret;
}
TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
@@ -2531,8 +2527,6 @@ static int send_subvol_begin(struct send_ctx *sctx)
ret = send_cmd(sctx);
tlv_put_failure:
-out:
- kfree(name);
return ret;
}
@@ -4079,7 +4073,7 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
*/
static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
{
- char *name;
+ char AUTO_KFREE(name);
int ret;
name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
@@ -4089,17 +4083,16 @@ static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
fs_path_reset(ref->full_path);
ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
if (ret < 0)
- goto out;
+ return ret;
ret = fs_path_add(ref->full_path, name, ref->name_len);
if (ret < 0)
- goto out;
+ return ret;
/* Update the reference's base name pointer. */
set_ref_path(ref, ref->full_path);
-out:
- kfree(name);
- return ret;
+
+ return 0;
}
static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node)
@@ -4951,6 +4944,7 @@ struct find_xattr_ctx {
int found_idx;
char *found_data;
int found_data_len;
+ bool copy_data;
};
static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
@@ -4962,9 +4956,11 @@ static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
strncmp(name, ctx->name, name_len) == 0) {
ctx->found_idx = num;
ctx->found_data_len = data_len;
- ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
- if (!ctx->found_data)
- return -ENOMEM;
+ if (ctx->copy_data) {
+ ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
+ if (!ctx->found_data)
+ return -ENOMEM;
+ }
return 1;
}
return 0;
@@ -4984,6 +4980,7 @@ static int find_xattr(struct btrfs_root *root,
ctx.found_idx = -1;
ctx.found_data = NULL;
ctx.found_data_len = 0;
+ ctx.copy_data = (data != NULL);
ret = iterate_dir_item(root, path, __find_xattr, &ctx);
if (ret < 0)
@@ -4995,7 +4992,7 @@ static int find_xattr(struct btrfs_root *root,
*data = ctx.found_data;
*data_len = ctx.found_data_len;
} else {
- kfree(ctx.found_data);
+ ASSERT(ctx.found_data == NULL);
}
return ctx.found_idx;
}
@@ -5008,8 +5005,8 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
{
int ret;
struct send_ctx *sctx = ctx;
- char *found_data = NULL;
- int found_data_len = 0;
+ char AUTO_KFREE(found_data);
+ int found_data_len = 0;
ret = find_xattr(sctx->parent_root, sctx->right_path,
sctx->cmp_key, name, name_len, &found_data,
@@ -5027,7 +5024,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
}
}
- kfree(found_data);
return ret;
}
@@ -5138,7 +5134,7 @@ static int process_verity(struct send_ctx *sctx)
if (ret < 0)
goto iput;
- if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
+ if (unlikely(ret > FS_VERITY_MAX_DESCRIPTOR_SIZE)) {
ret = -EMSGSIZE;
goto iput;
}
@@ -5182,14 +5178,14 @@ static int put_data_header(struct send_ctx *sctx, u32 len)
* Since v2, the data attribute header doesn't include a length,
* it is implicitly to the end of the command.
*/
- if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)
+ if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(__le16) + len))
return -EOVERFLOW;
put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
sctx->send_size += sizeof(__le16);
} else {
struct btrfs_tlv_header *hdr;
- if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
+ if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len))
return -EOVERFLOW;
hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
@@ -5589,8 +5585,8 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
* between the beginning of the command and the file data.
*/
data_offset = PAGE_ALIGN(sctx->send_size);
- if (data_offset > sctx->send_max_size ||
- sctx->send_max_size - data_offset < disk_num_bytes) {
+ if (unlikely(data_offset > sctx->send_max_size ||
+ sctx->send_max_size - data_offset < disk_num_bytes)) {
ret = -EOVERFLOW;
goto out;
}
@@ -5643,14 +5639,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- /*
- * Do not go through encoded read for bs > ps cases.
- *
- * Encoded send is using vmallocated pages as buffer, which we can
- * not ensure every folio is large enough to contain a block.
- */
- if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE &&
- (sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
+ if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
BTRFS_FILE_EXTENT_INLINE);
@@ -5764,7 +5753,7 @@ static int send_capabilities(struct send_ctx *sctx)
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
unsigned long data_ptr;
- char *buf = NULL;
+ char AUTO_KFREE(buf);
int buf_len;
int ret = 0;
@@ -5776,28 +5765,23 @@ static int send_capabilities(struct send_ctx *sctx)
XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
if (!di) {
/* There is no xattr for this inode */
- goto out;
+ return 0;
} else if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
+ return PTR_ERR(di);
}
leaf = path->nodes[0];
buf_len = btrfs_dir_data_len(leaf, di);
buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!buf)
+ return -ENOMEM;
data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
read_extent_buffer(leaf, buf, data_ptr, buf_len);
ret = send_set_xattr(sctx, XATTR_NAME_CAPS,
strlen(XATTR_NAME_CAPS), buf, buf_len);
-out:
- kfree(buf);
return ret;
}
@@ -7274,8 +7258,8 @@ static int search_key_again(const struct send_ctx *sctx,
if (unlikely(ret > 0)) {
btrfs_print_tree(path->nodes[path->lowest_level], false);
btrfs_err(root->fs_info,
-"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
- key->objectid, key->type, key->offset,
+"send: key " BTRFS_KEY_FMT" not found in %s root %llu, lowest_level %d, slot %d",
+ BTRFS_KEY_FMT_VALUE(key),
(root == sctx->parent_root ? "parent" : "send"),
btrfs_root_id(root), path->lowest_level,
path->slots[path->lowest_level]);
@@ -7643,10 +7627,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
}
- left_path->search_commit_root = 1;
- left_path->skip_locking = 1;
- right_path->search_commit_root = 1;
- right_path->skip_locking = 1;
+ left_path->search_commit_root = true;
+ left_path->skip_locking = true;
+ right_path->search_commit_root = true;
+ right_path->skip_locking = true;
/*
* Strategy: Go to the first items of both trees. Then do
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 97452fb5d29b..6babbe333741 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -15,6 +15,7 @@
#include "accessors.h"
#include "extent-tree.h"
#include "zoned.h"
+#include "delayed-inode.h"
/*
* HOW DOES SPACE RESERVATION WORK
@@ -67,7 +68,7 @@
* Assume we are unable to simply make the reservation because we do not have
* enough space
*
- * -> __reserve_bytes
+ * -> reserve_bytes
* create a reserve_ticket with ->bytes set to our reservation, add it to
* the tail of space_info->tickets, kick async flush thread
*
@@ -172,15 +173,14 @@
* thing with or without extra unallocated space.
*/
-u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
- bool may_use_included)
-{
- ASSERT(s_info);
- return s_info->bytes_used + s_info->bytes_reserved +
- s_info->bytes_pinned + s_info->bytes_readonly +
- s_info->bytes_zone_unusable +
- (may_use_included ? s_info->bytes_may_use : 0);
-}
+struct reserve_ticket {
+ u64 bytes;
+ int error;
+ bool steal;
+ struct list_head list;
+ wait_queue_head_t wait;
+ spinlock_t lock;
+};
/*
* after adding space to the filesystem, we need to clear the full flags
@@ -192,7 +192,7 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
struct btrfs_space_info *found;
list_for_each_entry(found, head, list)
- found->full = 0;
+ found->full = false;
}
/*
@@ -211,7 +211,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
if (btrfs_is_zoned(fs_info))
return fs_info->zone_size;
- ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK, "flags=%llu", flags);
if (flags & BTRFS_BLOCK_GROUP_DATA)
return BTRFS_MAX_DATA_CHUNK_SIZE;
@@ -262,8 +262,9 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
struct btrfs_space_info *sub_group;
int ret;
- ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
- ASSERT(id != BTRFS_SUB_GROUP_PRIMARY);
+ ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY,
+ "parent->subgroup_id=%d", parent->subgroup_id);
+ ASSERT(id != BTRFS_SUB_GROUP_PRIMARY, "id=%d", id);
sub_group = kzalloc(sizeof(*sub_group), GFP_NOFS);
if (!sub_group)
@@ -274,7 +275,7 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
sub_group->parent = parent;
sub_group->subgroup_id = id;
- ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group);
+ ret = btrfs_sysfs_add_space_info_type(sub_group);
if (ret) {
kfree(sub_group);
parent->sub_group[index] = NULL;
@@ -308,7 +309,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
return ret;
}
- ret = btrfs_sysfs_add_space_info_type(info, space_info);
+ ret = btrfs_sysfs_add_space_info_type(space_info);
if (ret)
return ret;
@@ -372,8 +373,8 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
space_info->bytes_readonly += block_group->bytes_super;
btrfs_space_info_update_bytes_zone_unusable(space_info, block_group->zone_unusable);
if (block_group->length > 0)
- space_info->full = 0;
- btrfs_try_granting_tickets(info, space_info);
+ space_info->full = false;
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
block_group->space_info = space_info;
@@ -421,10 +422,10 @@ static u64 calc_effective_data_chunk_size(struct btrfs_fs_info *fs_info)
return min_t(u64, data_chunk_size, SZ_1G);
}
-static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info,
- enum btrfs_reserve_flush_enum flush)
+static u64 calc_available_free_space(const struct btrfs_space_info *space_info,
+ enum btrfs_reserve_flush_enum flush)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 profile;
u64 avail;
u64 data_chunk_size;
@@ -490,44 +491,77 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
return avail;
}
-int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush)
+static inline bool check_can_overcommit(const struct btrfs_space_info *space_info,
+ u64 space_info_used_bytes, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ const u64 avail = calc_available_free_space(space_info, flush);
+
+ return (space_info_used_bytes + bytes < space_info->total_bytes + avail);
+}
+
+static inline bool can_overcommit(const struct btrfs_space_info *space_info,
+ u64 space_info_used_bytes, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ /* Don't overcommit when in mixed mode. */
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+ return false;
+
+ return check_can_overcommit(space_info, space_info_used_bytes, bytes, flush);
+}
+
+bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
{
- u64 avail;
u64 used;
/* Don't overcommit when in mixed mode */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
- return 0;
+ return false;
used = btrfs_space_info_used(space_info, true);
- avail = calc_available_free_space(fs_info, space_info, flush);
- if (used + bytes < space_info->total_bytes + avail)
- return 1;
- return 0;
+ return check_can_overcommit(space_info, used, bytes, flush);
}
static void remove_ticket(struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
+ struct reserve_ticket *ticket, int error)
{
+ lockdep_assert_held(&space_info->lock);
+
if (!list_empty(&ticket->list)) {
list_del_init(&ticket->list);
- ASSERT(space_info->reclaim_size >= ticket->bytes);
+ ASSERT(space_info->reclaim_size >= ticket->bytes,
+ "space_info->reclaim_size=%llu ticket->bytes=%llu",
+ space_info->reclaim_size, ticket->bytes);
space_info->reclaim_size -= ticket->bytes;
}
+
+ spin_lock(&ticket->lock);
+ /*
+ * If we are called from a task waiting on the ticket, it may happen
+ * that before it sets an error on the ticket, a reclaim task was able
+ * to satisfy the ticket. In that case ignore the error.
+ */
+ if (error && ticket->bytes > 0)
+ ticket->error = error;
+ else
+ ticket->bytes = 0;
+
+ wake_up(&ticket->wait);
+ spin_unlock(&ticket->lock);
}
/*
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
*/
-void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+void btrfs_try_granting_tickets(struct btrfs_space_info *space_info)
{
struct list_head *head;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+ u64 used = btrfs_space_info_used(space_info, true);
lockdep_assert_held(&space_info->lock);
@@ -535,19 +569,18 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
again:
while (!list_empty(head)) {
struct reserve_ticket *ticket;
- u64 used = btrfs_space_info_used(space_info, true);
+ u64 used_after;
ticket = list_first_entry(head, struct reserve_ticket, list);
+ used_after = used + ticket->bytes;
/* Check and see if our ticket can be satisfied now. */
- if ((used + ticket->bytes <= space_info->total_bytes) ||
- btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
- flush)) {
+ if (used_after <= space_info->total_bytes ||
+ can_overcommit(space_info, used, ticket->bytes, flush)) {
btrfs_space_info_update_bytes_may_use(space_info, ticket->bytes);
- remove_ticket(space_info, ticket);
- ticket->bytes = 0;
+ remove_ticket(space_info, ticket, 0);
space_info->tickets_id++;
- wake_up(&ticket->wait);
+ used = used_after;
} else {
break;
}
@@ -594,9 +627,9 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
}
-static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *info)
+static void __btrfs_dump_space_info(const struct btrfs_space_info *info)
{
+ const struct btrfs_fs_info *fs_info = info->fs_info;
const char *flag_str = space_info_flag_to_str(info);
lockdep_assert_held(&info->lock);
@@ -613,16 +646,16 @@ static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
info->bytes_readonly, info->bytes_zone_unusable);
}
-void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
+void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
bool dump_block_groups)
{
+ struct btrfs_fs_info *fs_info = info->fs_info;
struct btrfs_block_group *cache;
u64 total_avail = 0;
int index = 0;
spin_lock(&info->lock);
- __btrfs_dump_space_info(fs_info, info);
+ __btrfs_dump_space_info(info);
dump_global_block_rsv(fs_info);
spin_unlock(&info->lock);
@@ -670,11 +703,11 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
/*
* shrink metadata reservation for delalloc
*/
-static void shrink_delalloc(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static void shrink_delalloc(struct btrfs_space_info *space_info,
u64 to_reclaim, bool wait_ordered,
bool for_preempt)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
u64 ordered_bytes;
@@ -801,10 +834,10 @@ skip_async:
* and may fail for various reasons. The caller is supposed to examine the
* state of @space_info to detect the outcome.
*/
-static void flush_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 num_bytes,
- enum btrfs_flush_state state, bool for_preempt)
+static void flush_space(struct btrfs_space_info *space_info, u64 num_bytes,
+ enum btrfs_flush_state state, bool for_preempt)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
int nr;
@@ -833,7 +866,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
case FLUSH_DELALLOC_FULL:
if (state == FLUSH_DELALLOC_FULL)
num_bytes = U64_MAX;
- shrink_delalloc(fs_info, space_info, num_bytes,
+ shrink_delalloc(space_info, num_bytes,
state != FLUSH_DELALLOC, for_preempt);
break;
case FLUSH_DELAYED_REFS_NR:
@@ -900,8 +933,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
return;
}
-static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info)
+static u64 btrfs_calc_reclaim_metadata_size(const struct btrfs_space_info *space_info)
{
u64 used;
u64 avail;
@@ -909,8 +941,7 @@ static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
lockdep_assert_held(&space_info->lock);
- avail = calc_available_free_space(fs_info, space_info,
- BTRFS_RESERVE_FLUSH_ALL);
+ avail = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL);
used = btrfs_space_info_used(space_info, true);
/*
@@ -925,18 +956,25 @@ static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
return to_reclaim;
}
-static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info)
+static bool need_preemptive_reclaim(const struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
u64 ordered, delalloc;
u64 thresh;
u64 used;
- thresh = mult_perc(space_info->total_bytes, 90);
-
lockdep_assert_held(&space_info->lock);
+ /*
+ * We have tickets queued, bail so we don't compete with the async
+ * flushers.
+ */
+ if (space_info->reclaim_size)
+ return false;
+
+ thresh = mult_perc(space_info->total_bytes, 90);
+
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved +
global_rsv_size) >= thresh)
@@ -957,13 +995,6 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
return false;
/*
- * We have tickets queued, bail so we don't compete with the async
- * flushers.
- */
- if (space_info->reclaim_size)
- return false;
-
- /*
* If we have over half of the free space occupied by reservations or
* pinned then we want to start flushing.
*
@@ -992,8 +1023,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
* much delalloc we need for the background flusher to kick in.
*/
- thresh = calc_available_free_space(fs_info, space_info,
- BTRFS_RESERVE_FLUSH_ALL);
+ thresh = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_readonly + global_rsv_size;
if (used < space_info->total_bytes)
@@ -1037,13 +1067,15 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
-static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static bool steal_from_global_rsv(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 min_bytes;
+ lockdep_assert_held(&space_info->lock);
+
if (!ticket->steal)
return false;
@@ -1057,21 +1089,19 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
return false;
}
global_rsv->reserved -= ticket->bytes;
- remove_ticket(space_info, ticket);
- ticket->bytes = 0;
- wake_up(&ticket->wait);
- space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
- global_rsv->full = 0;
+ global_rsv->full = false;
spin_unlock(&global_rsv->lock);
+ remove_ticket(space_info, ticket, 0);
+ space_info->tickets_id++;
+
return true;
}
/*
* We've exhausted our flushing, start failing tickets.
*
- * @fs_info - fs_info for this fs
* @space_info - the space info we were flushing
*
* We call this when we've exhausted our flushing ability and haven't made
@@ -1084,47 +1114,44 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
* other tickets, or if it stumbles across a ticket that was smaller than the
* first ticket.
*/
-static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+static bool maybe_fail_all_tickets(struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct reserve_ticket *ticket;
u64 tickets_id = space_info->tickets_id;
- const bool aborted = BTRFS_FS_ERROR(fs_info);
+ const int abort_error = BTRFS_FS_ERROR(fs_info);
trace_btrfs_fail_all_tickets(fs_info, space_info);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
- __btrfs_dump_space_info(fs_info, space_info);
+ __btrfs_dump_space_info(space_info);
}
while (!list_empty(&space_info->tickets) &&
tickets_id == space_info->tickets_id) {
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
+ if (unlikely(abort_error)) {
+ remove_ticket(space_info, ticket, abort_error);
+ } else {
+ if (steal_from_global_rsv(space_info, ticket))
+ return true;
- if (!aborted && steal_from_global_rsv(fs_info, space_info, ticket))
- return true;
-
- if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_info(fs_info, "failing ticket with %llu bytes",
- ticket->bytes);
+ if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+ btrfs_info(fs_info, "failing ticket with %llu bytes",
+ ticket->bytes);
- remove_ticket(space_info, ticket);
- if (aborted)
- ticket->error = -EIO;
- else
- ticket->error = -ENOSPC;
- wake_up(&ticket->wait);
+ remove_ticket(space_info, ticket, -ENOSPC);
- /*
- * We're just throwing tickets away, so more flushing may not
- * trip over btrfs_try_granting_tickets, so we need to call it
- * here to see if we can make progress with the next ticket in
- * the list.
- */
- if (!aborted)
- btrfs_try_granting_tickets(fs_info, space_info);
+ /*
+ * We're just throwing tickets away, so more flushing may
+ * not trip over btrfs_try_granting_tickets, so we need
+ * to call it here to see if we can make progress with
+ * the next ticket in the list.
+ */
+ btrfs_try_granting_tickets(space_info);
+ }
}
return (tickets_id != space_info->tickets_id);
}
@@ -1144,9 +1171,9 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
final_state = COMMIT_TRANS;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
if (!to_reclaim) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1155,15 +1182,14 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
- flush_space(fs_info, space_info, to_reclaim, flush_state, false);
+ flush_space(space_info, to_reclaim, flush_state, false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
if (last_tickets_id == space_info->tickets_id) {
flush_state++;
} else {
@@ -1197,11 +1223,11 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
if (flush_state > final_state) {
commit_cycles++;
if (commit_cycles > 2) {
- if (maybe_fail_all_tickets(fs_info, space_info)) {
+ if (maybe_fail_all_tickets(space_info)) {
flush_state = FLUSH_DELAYED_ITEMS_NR;
commit_cycles--;
} else {
- space_info->flush = 0;
+ space_info->flush = false;
}
} else {
flush_state = FLUSH_DELAYED_ITEMS_NR;
@@ -1257,14 +1283,15 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
trans_rsv = &fs_info->trans_block_rsv;
spin_lock(&space_info->lock);
- while (need_preemptive_reclaim(fs_info, space_info)) {
+ while (need_preemptive_reclaim(space_info)) {
enum btrfs_flush_state flush;
u64 delalloc_size = 0;
u64 to_reclaim, block_rsv_size;
const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv);
+ const u64 bytes_may_use = space_info->bytes_may_use;
+ const u64 bytes_pinned = space_info->bytes_pinned;
- loops++;
-
+ spin_unlock(&space_info->lock);
/*
* We don't have a precise counter for the metadata being
* reserved for delalloc, so we'll approximate it by subtracting
@@ -1276,8 +1303,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
btrfs_block_rsv_reserved(delayed_block_rsv) +
btrfs_block_rsv_reserved(delayed_refs_rsv) +
btrfs_block_rsv_reserved(trans_rsv);
- if (block_rsv_size < space_info->bytes_may_use)
- delalloc_size = space_info->bytes_may_use - block_rsv_size;
+ if (block_rsv_size < bytes_may_use)
+ delalloc_size = bytes_may_use - block_rsv_size;
/*
* We don't want to include the global_rsv in our calculation,
@@ -1294,10 +1321,10 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
if (delalloc_size > block_rsv_size) {
to_reclaim = delalloc_size;
flush = FLUSH_DELALLOC;
- } else if (space_info->bytes_pinned >
+ } else if (bytes_pinned >
(btrfs_block_rsv_reserved(delayed_block_rsv) +
btrfs_block_rsv_reserved(delayed_refs_rsv))) {
- to_reclaim = space_info->bytes_pinned;
+ to_reclaim = bytes_pinned;
flush = COMMIT_TRANS;
} else if (btrfs_block_rsv_reserved(delayed_block_rsv) >
btrfs_block_rsv_reserved(delayed_refs_rsv)) {
@@ -1308,7 +1335,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
flush = FLUSH_DELAYED_REFS_NR;
}
- spin_unlock(&space_info->lock);
+ loops++;
/*
* We don't want to reclaim everything, just a portion, so scale
@@ -1318,7 +1345,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
to_reclaim >>= 2;
if (!to_reclaim)
to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1);
- flush_space(fs_info, space_info, to_reclaim, flush, true);
+ flush_space(space_info, to_reclaim, flush, true);
cond_resched();
spin_lock(&space_info->lock);
}
@@ -1383,7 +1410,7 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1391,27 +1418,27 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
spin_unlock(&space_info->lock);
while (!space_info->full) {
- flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
+ flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
/* Something happened, fail everything and bail. */
- if (BTRFS_FS_ERROR(fs_info))
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
goto aborted_fs;
last_tickets_id = space_info->tickets_id;
spin_unlock(&space_info->lock);
}
while (flush_state < ARRAY_SIZE(data_flush_states)) {
- flush_space(fs_info, space_info, U64_MAX,
+ flush_space(space_info, U64_MAX,
data_flush_states[flush_state], false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1425,16 +1452,16 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
if (flush_state >= ARRAY_SIZE(data_flush_states)) {
if (space_info->full) {
- if (maybe_fail_all_tickets(fs_info, space_info))
+ if (maybe_fail_all_tickets(space_info))
flush_state = 0;
else
- space_info->flush = 0;
+ space_info->flush = false;
} else {
flush_state = 0;
}
/* Something happened, fail everything and bail. */
- if (BTRFS_FS_ERROR(fs_info))
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
goto aborted_fs;
}
@@ -1443,8 +1470,8 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
return;
aborted_fs:
- maybe_fail_all_tickets(fs_info, space_info);
- space_info->flush = 0;
+ maybe_fail_all_tickets(space_info);
+ space_info->flush = false;
spin_unlock(&space_info->lock);
}
@@ -1489,40 +1516,47 @@ static const enum btrfs_flush_state evict_flush_states[] = {
RESET_ZONES,
};
-static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket,
- const enum btrfs_flush_state *states,
- int states_nr)
+static bool is_ticket_served(struct reserve_ticket *ticket)
{
+ bool ret;
+
+ spin_lock(&ticket->lock);
+ ret = (ticket->bytes == 0);
+ spin_unlock(&ticket->lock);
+
+ return ret;
+}
+
+static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket,
+ const enum btrfs_flush_state *states,
+ int states_nr)
+{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 to_reclaim;
int flush_state = 0;
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
/*
* This is the priority reclaim path, so to_reclaim could be >0 still
* because we may have only satisfied the priority tickets and still
* left non priority tickets on the list. We would then have
* to_reclaim but ->bytes == 0.
*/
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ if (is_ticket_served(ticket))
return;
- }
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
+ spin_unlock(&space_info->lock);
while (flush_state < states_nr) {
- spin_unlock(&space_info->lock);
- flush_space(fs_info, space_info, to_reclaim, states[flush_state],
- false);
- flush_state++;
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ flush_space(space_info, to_reclaim, states[flush_state], false);
+ if (is_ticket_served(ticket))
return;
- }
+ flush_state++;
}
+ spin_lock(&space_info->lock);
/*
* Attempt to steal from the global rsv if we can, except if the fs was
* turned into error mode due to a transaction abort when flushing space
@@ -1531,48 +1565,38 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
* just to have caller fail immediately instead of later when trying to
* modify the fs, making it easier to debug -ENOSPC problems.
*/
- if (BTRFS_FS_ERROR(fs_info)) {
- ticket->error = BTRFS_FS_ERROR(fs_info);
- remove_ticket(space_info, ticket);
- } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
- ticket->error = -ENOSPC;
- remove_ticket(space_info, ticket);
- }
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
+ remove_ticket(space_info, ticket, BTRFS_FS_ERROR(fs_info));
+ else if (!steal_from_global_rsv(space_info, ticket))
+ remove_ticket(space_info, ticket, -ENOSPC);
/*
* We must run try_granting_tickets here because we could be a large
* ticket in front of a smaller ticket that can now be satisfied with
* the available space.
*/
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
-static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static void priority_reclaim_data_space(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
- spin_lock(&space_info->lock);
-
/* We could have been granted before we got here. */
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ if (is_ticket_served(ticket))
return;
- }
+ spin_lock(&space_info->lock);
while (!space_info->full) {
spin_unlock(&space_info->lock);
- flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
+ if (is_ticket_served(ticket))
return;
- }
+ spin_lock(&space_info->lock);
}
- ticket->error = -ENOSPC;
- remove_ticket(space_info, ticket);
- btrfs_try_granting_tickets(fs_info, space_info);
+ remove_ticket(space_info, ticket, -ENOSPC);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
@@ -1581,11 +1605,13 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
{
DEFINE_WAIT(wait);
- int ret = 0;
- spin_lock(&space_info->lock);
+ spin_lock(&ticket->lock);
while (ticket->bytes > 0 && ticket->error == 0) {
+ int ret;
+
ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+ spin_unlock(&ticket->lock);
if (ret) {
/*
* Delete us from the list. After we unlock the space
@@ -1595,24 +1621,23 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
* despite getting an error, resulting in a space leak
* (bytes_may_use counter of our space_info).
*/
- remove_ticket(space_info, ticket);
- ticket->error = -EINTR;
- break;
+ spin_lock(&space_info->lock);
+ remove_ticket(space_info, ticket, -EINTR);
+ spin_unlock(&space_info->lock);
+ return;
}
- spin_unlock(&space_info->lock);
schedule();
finish_wait(&ticket->wait, &wait);
- spin_lock(&space_info->lock);
+ spin_lock(&ticket->lock);
}
- spin_unlock(&space_info->lock);
+ spin_unlock(&ticket->lock);
}
/*
* Do the appropriate flushing and waiting for a ticket.
*
- * @fs_info: the filesystem
* @space_info: space info for the reservation
* @ticket: ticket for the reservation
* @start_ns: timestamp when the reservation started
@@ -1622,8 +1647,7 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
* This does the work of figuring out how to flush for the ticket, waiting for
* the reservation, and returning the appropriate error if there is one.
*/
-static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static int handle_reserve_ticket(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket,
u64 start_ns, u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
@@ -1637,20 +1661,20 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
wait_reserve_ticket(space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
- priority_reclaim_metadata_space(fs_info, space_info, ticket,
+ priority_reclaim_metadata_space(space_info, ticket,
priority_flush_states,
ARRAY_SIZE(priority_flush_states));
break;
case BTRFS_RESERVE_FLUSH_EVICT:
- priority_reclaim_metadata_space(fs_info, space_info, ticket,
+ priority_reclaim_metadata_space(space_info, ticket,
evict_flush_states,
ARRAY_SIZE(evict_flush_states));
break;
case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
- priority_reclaim_data_space(fs_info, space_info, ticket);
+ priority_reclaim_data_space(space_info, ticket);
break;
default:
- ASSERT(0);
+ ASSERT(0, "flush=%d", flush);
break;
}
@@ -1662,9 +1686,10 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
* releasing reserved space (if an error happens the expectation is that
* space wasn't reserved at all).
*/
- ASSERT(!(ticket->bytes == 0 && ticket->error));
- trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes,
- start_ns, flush, ticket->error);
+ ASSERT(!(ticket->bytes == 0 && ticket->error),
+ "ticket->bytes=%llu ticket->error=%d", ticket->bytes, ticket->error);
+ trace_btrfs_reserve_ticket(space_info->fs_info, space_info->flags,
+ orig_bytes, start_ns, flush, ticket->error);
return ret;
}
@@ -1678,9 +1703,9 @@ static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
(flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
}
-static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+static inline void maybe_clamp_preempt(struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes);
u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
@@ -1715,7 +1740,6 @@ static inline bool can_ticket(enum btrfs_reserve_flush_enum flush)
/*
* Try to reserve bytes from the block_rsv's space.
*
- * @fs_info: the filesystem
* @space_info: space info we want to allocate from
* @orig_bytes: number of bytes we want
* @flush: whether or not we can flush to make our reservation
@@ -1727,10 +1751,10 @@ static inline bool can_ticket(enum btrfs_reserve_flush_enum flush)
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-static int __reserve_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
+static int reserve_bytes(struct btrfs_space_info *space_info, u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct work_struct *async_work;
struct reserve_ticket ticket;
u64 start_ns = 0;
@@ -1738,7 +1762,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
int ret = -ENOSPC;
bool pending_tickets;
- ASSERT(orig_bytes);
+ ASSERT(orig_bytes, "orig_bytes=%llu", orig_bytes);
/*
* If have a transaction handle (current->journal_info != NULL), then
* the flush method can not be neither BTRFS_RESERVE_FLUSH_ALL* nor
@@ -1747,9 +1771,9 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (current->journal_info) {
/* One assert per line for easier debugging. */
- ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL);
- ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL);
- ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL, "flush=%d", flush);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL, "flush=%d", flush);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT, "flush=%d", flush);
}
if (flush == BTRFS_RESERVE_FLUSH_DATA)
@@ -1777,7 +1801,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (!pending_tickets &&
((used + orig_bytes <= space_info->total_bytes) ||
- btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+ can_overcommit(space_info, used, orig_bytes, flush))) {
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
ret = 0;
}
@@ -1788,7 +1812,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* left to allocate for the block.
*/
if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
- used = btrfs_space_info_used(space_info, false);
+ used -= space_info->bytes_may_use;
if (used + orig_bytes <= space_info->total_bytes) {
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
ret = 0;
@@ -1807,6 +1831,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
+ spin_lock_init(&ticket.lock);
ticket.steal = can_steal(flush);
if (trace_btrfs_reserve_ticket_enabled())
start_ns = ktime_get_ns();
@@ -1823,9 +1848,9 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* preemptive flushing in order to keep up with
* the workload.
*/
- maybe_clamp_preempt(fs_info, space_info);
+ maybe_clamp_preempt(space_info);
- space_info->flush = 1;
+ space_info->flush = true;
trace_btrfs_trigger_flush(fs_info,
space_info->flags,
orig_bytes, flush,
@@ -1844,7 +1869,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
!work_busy(&fs_info->preempt_reclaim_work) &&
- need_preemptive_reclaim(fs_info, space_info)) {
+ need_preemptive_reclaim(space_info)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt");
queue_work(system_dfl_wq,
@@ -1855,14 +1880,12 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
if (!ret || !can_ticket(flush))
return ret;
- return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
- orig_bytes, flush);
+ return handle_reserve_ticket(space_info, &ticket, start_ns, orig_bytes, flush);
}
/*
* Try to reserve metadata bytes from the block_rsv's space.
*
- * @fs_info: the filesystem
* @space_info: the space_info we're allocating for
* @orig_bytes: number of bytes we want
* @flush: whether or not we can flush to make our reservation
@@ -1874,20 +1897,21 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;
- ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush);
+ ret = reserve_bytes(space_info, orig_bytes, flush);
if (ret == -ENOSPC) {
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
+
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
space_info->flags, orig_bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, orig_bytes, false);
+ btrfs_dump_space_info(space_info, orig_bytes, false);
}
return ret;
}
@@ -1895,7 +1919,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
/*
* Try to reserve data bytes for an allocation.
*
- * @fs_info: the filesystem
+ * @space_info: the space_info we're allocating for
* @bytes: number of bytes we need
* @flush: how we are allowed to flush
*
@@ -1910,15 +1934,17 @@ int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE ||
- flush == BTRFS_RESERVE_NO_FLUSH);
- ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
+ flush == BTRFS_RESERVE_NO_FLUSH, "flush=%d", flush);
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA,
+ "current->journal_info=0x%lx flush=%d",
+ (unsigned long)current->journal_info, flush);
- ret = __reserve_bytes(fs_info, space_info, bytes, flush);
+ ret = reserve_bytes(space_info, bytes, flush);
if (ret == -ENOSPC) {
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
space_info->flags, bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, bytes, false);
+ btrfs_dump_space_info(space_info, bytes, false);
}
return ret;
}
@@ -1931,7 +1957,7 @@ __cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "dumping space info:");
list_for_each_entry(space_info, &fs_info->space_info, list) {
spin_lock(&space_info->lock);
- __btrfs_dump_space_info(fs_info, space_info);
+ __btrfs_dump_space_info(space_info);
spin_unlock(&space_info->lock);
}
dump_global_block_rsv(fs_info);
@@ -1948,7 +1974,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
int factor;
/* It's df, we don't care if it's racy */
- if (list_empty(&sinfo->ro_bgs))
+ if (data_race(list_empty(&sinfo->ro_bgs)))
return 0;
spin_lock(&sinfo->lock);
@@ -2187,7 +2213,7 @@ void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
- global_rsv->full = 1;
+ global_rsv->full = true;
len -= to_add;
}
spin_unlock(&global_rsv->lock);
@@ -2195,5 +2221,5 @@ void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
grant:
/* Add to any tickets we may have. */
if (len)
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 679f22efb407..446c0614ad4a 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -142,11 +142,11 @@ struct btrfs_space_info {
flushing. The value is >> clamp, so turns
out to be a 2^clamp divisor. */
- unsigned int full:1; /* indicates that we cannot allocate any more
+ bool full; /* indicates that we cannot allocate any more
chunks for this space */
- unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
+ bool chunk_alloc; /* set if we are allocating a chunk */
- unsigned int flush:1; /* set if we are trying to make space */
+ bool flush; /* set if we are trying to make space */
unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */
@@ -224,14 +224,6 @@ struct btrfs_space_info {
s64 reclaimable_bytes;
};
-struct reserve_ticket {
- u64 bytes;
- int error;
- bool steal;
- struct list_head list;
- wait_queue_head_t wait;
-};
-
static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@@ -266,6 +258,17 @@ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
+static inline u64 btrfs_space_info_used(const struct btrfs_space_info *s_info,
+ bool may_use_included)
+{
+ lockdep_assert_held(&s_info->lock);
+
+ return s_info->bytes_used + s_info->bytes_reserved +
+ s_info->bytes_pinned + s_info->bytes_readonly +
+ s_info->bytes_zone_unusable +
+ (may_use_included ? s_info->bytes_may_use : 0);
+}
+
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
struct btrfs_block_group *block_group);
@@ -273,21 +276,15 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
u64 chunk_size);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
u64 flags);
-u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
- bool may_use_included);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
-void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
+void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
bool dump_block_groups);
-int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush);
-void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info);
-int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush);
+void btrfs_try_granting_tickets(struct btrfs_space_info *space_info);
+bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush);
static inline void btrfs_space_info_free_bytes_may_use(
struct btrfs_space_info *space_info,
@@ -295,7 +292,7 @@ static inline void btrfs_space_info_free_bytes_may_use(
{
spin_lock(&space_info->lock);
btrfs_space_info_update_bytes_may_use(space_info, -num_bytes);
- btrfs_try_granting_tickets(space_info->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 5ca8d4db6722..f82e71f5d88b 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -180,13 +180,14 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
/* Basic checks */
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
- IS_ALIGNED(len, fs_info->sectorsize));
+ IS_ALIGNED(len, fs_info->sectorsize), "start=%llu len=%u", start, len);
/*
* The range check only works for mapped page, we can still have
* unmapped page like dummy extent buffer pages.
*/
if (folio->mapping)
- ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio),
+ ASSERT(folio_pos(folio) <= start &&
+ start + len <= folio_next_pos(folio),
"start=%llu len=%u folio_pos=%llu folio_size=%zu",
start, len, folio_pos(folio), folio_size(folio));
}
@@ -194,12 +195,11 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
({ \
unsigned int __start_bit; \
- const unsigned int blocks_per_folio = \
- btrfs_blocks_per_folio(fs_info, folio); \
+ const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
\
btrfs_subpage_assert(fs_info, folio, start, len); \
__start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \
- __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \
+ __start_bit += __bpf * btrfs_bitmap_nr_##name; \
__start_bit; \
})
@@ -217,7 +217,7 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
if (folio_pos(folio) >= orig_start + orig_len)
*len = 0;
else
- *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start;
+ *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start;
}
static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
@@ -250,7 +250,9 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
clear_bit(bit, bfs->bitmaps);
cleared++;
}
- ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
+ ASSERT(atomic_read(&bfs->nr_locked) >= cleared,
+ "atomic_read(&bfs->nr_locked)=%d cleared=%d",
+ atomic_read(&bfs->nr_locked), cleared);
last = atomic_sub_and_test(cleared, &bfs->nr_locked);
spin_unlock_irqrestore(&bfs->lock, flags);
return last;
@@ -329,7 +331,9 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
if (test_and_clear_bit(bit + start_bit, bfs->bitmaps))
cleared++;
}
- ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
+ ASSERT(atomic_read(&bfs->nr_locked) >= cleared,
+ "atomic_read(&bfs->nr_locked)=%d cleared=%d",
+ atomic_read(&bfs->nr_locked), cleared);
last = atomic_sub_and_test(cleared, &bfs->nr_locked);
spin_unlock_irqrestore(&bfs->lock, flags);
if (last)
@@ -338,24 +342,20 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
#define subpage_test_bitmap_all_set(fs_info, folio, name) \
({ \
- struct btrfs_folio_state *bfs = folio_get_private(folio); \
- const unsigned int blocks_per_folio = \
- btrfs_blocks_per_folio(fs_info, folio); \
+ struct btrfs_folio_state *__bfs = folio_get_private(folio); \
+ const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
\
- bitmap_test_range_all_set(bfs->bitmaps, \
- blocks_per_folio * btrfs_bitmap_nr_##name, \
- blocks_per_folio); \
+ bitmap_test_range_all_set(__bfs->bitmaps, \
+ __bpf * btrfs_bitmap_nr_##name, __bpf); \
})
#define subpage_test_bitmap_all_zero(fs_info, folio, name) \
({ \
- struct btrfs_folio_state *bfs = folio_get_private(folio); \
- const unsigned int blocks_per_folio = \
- btrfs_blocks_per_folio(fs_info, folio); \
+ struct btrfs_folio_state *__bfs = folio_get_private(folio); \
+ const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
\
- bitmap_test_range_all_zero(bfs->bitmaps, \
- blocks_per_folio * btrfs_bitmap_nr_##name, \
- blocks_per_folio); \
+ bitmap_test_range_all_zero(__bfs->bitmaps, \
+ __bpf * btrfs_bitmap_nr_##name, __bpf); \
})
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
@@ -445,6 +445,7 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
writeback, start, len);
unsigned long flags;
+ bool keep_write;
spin_lock_irqsave(&bfs->lock, flags);
bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
@@ -455,18 +456,9 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
* assume writeback is complete, and exit too early — violating sync
* ordering guarantees.
*/
+ keep_write = folio_test_dirty(folio);
if (!folio_test_writeback(folio))
- __folio_start_writeback(folio, true);
- if (!folio_test_dirty(folio)) {
- struct address_space *mapping = folio_mapping(folio);
- XA_STATE(xas, &mapping->i_pages, folio->index);
- unsigned long flags;
-
- xas_lock_irqsave(&xas, flags);
- xas_load(&xas);
- xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
- xas_unlock_irqrestore(&xas, flags);
- }
+ __folio_start_writeback(folio, keep_write);
spin_unlock_irqrestore(&bfs->lock, flags);
}
@@ -672,27 +664,23 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
#define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \
{ \
- const unsigned int blocks_per_folio = \
- btrfs_blocks_per_folio(fs_info, folio); \
- const struct btrfs_folio_state *bfs = folio_get_private(folio); \
+ const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
+ const struct btrfs_folio_state *__bfs = folio_get_private(folio); \
\
- ASSERT(blocks_per_folio <= BITS_PER_LONG); \
- *dst = bitmap_read(bfs->bitmaps, \
- blocks_per_folio * btrfs_bitmap_nr_##name, \
- blocks_per_folio); \
+ ASSERT(__bpf <= BITS_PER_LONG); \
+ *dst = bitmap_read(__bfs->bitmaps, \
+ __bpf * btrfs_bitmap_nr_##name, __bpf); \
}
#define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \
{ \
unsigned long bitmap; \
- const unsigned int blocks_per_folio = \
- btrfs_blocks_per_folio(fs_info, folio); \
+ const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
\
GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \
btrfs_warn(fs_info, \
"dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
- start, len, folio_pos(folio), \
- blocks_per_folio, &bitmap); \
+ start, len, folio_pos(folio), __bpf, &bitmap); \
}
/*
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index ad0552db7c7d..d81a0ade559f 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -7,7 +7,6 @@
#include <linux/atomic.h>
#include <linux/sizes.h>
#include "btrfs_inode.h"
-#include "fs.h"
struct address_space;
struct folio;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 430e7419349c..1999533b52be 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -807,17 +807,15 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
struct btrfs_root_ref *root_ref;
struct btrfs_inode_ref *inode_ref;
struct btrfs_key key;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
char *name = NULL, *ptr;
u64 dirid;
int len;
int ret;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!path)
+ return ERR_PTR(-ENOMEM);
name = kmalloc(PATH_MAX, GFP_KERNEL);
if (!name) {
@@ -905,7 +903,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
fs_root = NULL;
}
- btrfs_free_path(path);
if (ptr == name + PATH_MAX - 1) {
name[0] = '/';
name[1] = '\0';
@@ -916,7 +913,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
err:
btrfs_put_root(fs_root);
- btrfs_free_path(path);
kfree(name);
return ERR_PTR(ret);
}
@@ -1614,7 +1610,7 @@ static inline void btrfs_descending_sort_devices(
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 *free_bytes)
{
- struct btrfs_device_info *devices_info;
+ struct btrfs_device_info AUTO_KFREE(devices_info);
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
u64 type;
@@ -1712,7 +1708,6 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
nr_devices--;
}
- kfree(devices_info);
*free_bytes = avail_space;
return 0;
}
@@ -2430,6 +2425,66 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
return 0;
}
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_remove_bdev(struct super_block *sb, struct block_device *bdev)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_device *device;
+ struct btrfs_dev_lookup_args lookup_args = { .devt = bdev->bd_dev };
+ bool can_rw;
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ device = btrfs_find_device(fs_info->fs_devices, &lookup_args);
+ if (!device) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ /* Device not found, should not affect the running fs, just give a warning. */
+ btrfs_warn(fs_info, "unable to find btrfs device for block device '%pg'", bdev);
+ return 0;
+ }
+ /*
+ * The to-be-removed device is already missing?
+ *
+ * That's weird but no special handling needed and can exit right now.
+ */
+ if (unlikely(test_and_set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ btrfs_warn(fs_info, "btrfs device id %llu is already missing", device->devid);
+ return 0;
+ }
+
+ device->fs_devices->missing_devices++;
+ if (test_and_clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
+ list_del_init(&device->dev_alloc_list);
+ WARN_ON(device->fs_devices->rw_devices < 1);
+ device->fs_devices->rw_devices--;
+ }
+ can_rw = btrfs_check_rw_degradable(fs_info, device);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ /*
+ * Now device is considered missing, btrfs_device_name() won't give a
+ * meaningful result anymore, so only output the devid.
+ */
+ if (unlikely(!can_rw)) {
+ btrfs_crit(fs_info,
+ "btrfs device id %llu has gone missing, can not maintain read-write",
+ device->devid);
+ return -EIO;
+ }
+ btrfs_warn(fs_info,
+ "btrfs device id %llu has gone missing, continue as degraded",
+ device->devid);
+ btrfs_set_opt(fs_info->mount_opt, DEGRADED);
+ return 0;
+}
+
+static void btrfs_shutdown(struct super_block *sb)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+ btrfs_force_shutdown(fs_info);
+}
+#endif
+
static const struct super_operations btrfs_super_ops = {
.drop_inode = btrfs_drop_inode,
.evict_inode = btrfs_evict_inode,
@@ -2445,6 +2500,10 @@ static const struct super_operations btrfs_super_ops = {
.unfreeze_fs = btrfs_unfreeze,
.nr_cached_objects = btrfs_nr_cached_objects,
.free_cached_objects = btrfs_free_cached_objects,
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ .remove_bdev = btrfs_remove_bdev,
+ .shutdown = btrfs_shutdown,
+#endif
};
static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 81f52c1f55ce..1f64c132b387 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -10,6 +10,7 @@
#include <linux/completion.h>
#include <linux/bug.h>
#include <linux/list.h>
+#include <linux/string_choices.h>
#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
@@ -25,6 +26,7 @@
#include "misc.h"
#include "fs.h"
#include "accessors.h"
+#include "zoned.h"
/*
* Structure name Path
@@ -1187,6 +1189,56 @@ static ssize_t btrfs_commit_stats_store(struct kobject *kobj,
}
BTRFS_ATTR_RW(, commit_stats, btrfs_commit_stats_show, btrfs_commit_stats_store);
+static ssize_t btrfs_zoned_stats_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ struct btrfs_block_group *bg;
+ size_t ret = 0;
+
+
+ if (!btrfs_is_zoned(fs_info))
+ return ret;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ ret += sysfs_emit_at(buf, ret, "active block-groups: %zu\n",
+ list_count_nodes(&fs_info->zone_active_bgs));
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+
+ mutex_lock(&fs_info->reclaim_bgs_lock);
+ spin_lock(&fs_info->unused_bgs_lock);
+ ret += sysfs_emit_at(buf, ret, "\treclaimable: %zu\n",
+ list_count_nodes(&fs_info->reclaim_bgs));
+ ret += sysfs_emit_at(buf, ret, "\tunused: %zu\n",
+ list_count_nodes(&fs_info->unused_bgs));
+ spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
+
+ ret += sysfs_emit_at(buf, ret, "\tneed reclaim: %s\n",
+ str_true_false(btrfs_zoned_should_reclaim(fs_info)));
+
+ if (fs_info->data_reloc_bg)
+ ret += sysfs_emit_at(buf, ret,
+ "data relocation block-group: %llu\n",
+ fs_info->data_reloc_bg);
+ if (fs_info->treelog_bg)
+ ret += sysfs_emit_at(buf, ret,
+ "tree-log block-group: %llu\n",
+ fs_info->treelog_bg);
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ ret += sysfs_emit_at(buf, ret, "active zones:\n");
+ list_for_each_entry(bg, &fs_info->zone_active_bgs, active_bg_list) {
+ ret += sysfs_emit_at(buf, ret,
+ "\tstart: %llu, wp: %llu used: %llu, reserved: %llu, unusable: %llu\n",
+ bg->start, bg->alloc_offset, bg->used,
+ bg->reserved, bg->zone_unusable);
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ return ret;
+}
+BTRFS_ATTR(, zoned_stats, btrfs_zoned_stats_show);
+
static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
@@ -1599,6 +1651,7 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
BTRFS_ATTR_PTR(, commit_stats),
BTRFS_ATTR_PTR(, temp_fsid),
+ BTRFS_ATTR_PTR(, zoned_stats),
#ifdef CONFIG_BTRFS_EXPERIMENTAL
BTRFS_ATTR_PTR(, offload_csum),
#endif
@@ -1981,13 +2034,12 @@ static const char *alloc_name(struct btrfs_space_info *space_info)
* Create a sysfs entry for a space info type at path
* /sys/fs/btrfs/UUID/allocation/TYPE
*/
-int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info)
{
int ret;
ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
- fs_info->space_info_kobj, "%s",
+ space_info->fs_info->space_info_kobj, "%s",
alloc_name(space_info));
if (ret) {
kobject_put(&space_info->kobj);
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 0f94ae923210..05498e5346c3 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -37,8 +37,7 @@ void __cold btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache);
-int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info);
+int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info);
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
void btrfs_sysfs_update_devid(struct btrfs_device *device);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index b19328d077d3..a0187d6163df 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -505,7 +505,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
{
struct btrfs_fs_info *fs_info;
- unsigned long *bitmap = NULL;
+ unsigned long AUTO_KFREE(bitmap);
struct extent_buffer *eb = NULL;
int ret;
@@ -551,7 +551,6 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
ret = __test_eb_bitmaps(bitmap, eb);
out:
free_extent_buffer(eb);
- kfree(bitmap);
btrfs_free_dummy_fs_info(fs_info);
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 42af6c737c6e..0b9f25dd1a68 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -1013,7 +1013,7 @@ static int test_rmap_block(struct btrfs_fs_info *fs_info,
struct rmap_test_vector *test)
{
struct btrfs_chunk_map *map;
- u64 *logical = NULL;
+ u64 AUTO_KFREE(logical);
int i, out_ndaddrs, out_stripe_len;
int ret;
@@ -1046,7 +1046,7 @@ static int test_rmap_block(struct btrfs_fs_info *fs_info,
if (ret) {
test_err("error adding chunk map to mapping tree");
btrfs_free_chunk_map(map);
- goto out_free;
+ return ret;
}
ret = btrfs_rmap_block(fs_info, map->start, btrfs_sb_offset(1),
@@ -1079,8 +1079,6 @@ static int test_rmap_block(struct btrfs_fs_info *fs_info,
ret = 0;
out:
btrfs_remove_chunk_map(fs_info, map);
-out_free:
- kfree(logical);
return ret;
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 3fc8dc3fd980..05cfda8af422 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -20,7 +20,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
struct btrfs_extent_item *item;
struct btrfs_extent_inline_ref *iref;
struct btrfs_tree_block_info *block_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key ins;
u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
@@ -41,7 +41,6 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
if (ret) {
test_err("couldn't insert ref %d", ret);
- btrfs_free_path(path);
return ret;
}
@@ -61,7 +60,6 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
}
- btrfs_free_path(path);
return 0;
}
@@ -70,7 +68,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
{
struct btrfs_trans_handle trans;
struct btrfs_extent_item *item;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
u64 refs;
int ret;
@@ -90,7 +88,6 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_err("couldn't find extent ref");
- btrfs_free_path(path);
return ret;
}
@@ -112,7 +109,6 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
if (ret)
test_err("failed to insert backref");
- btrfs_free_path(path);
return ret;
}
@@ -121,7 +117,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
{
struct btrfs_trans_handle trans;
struct btrfs_key key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
btrfs_init_dummy_trans(&trans, NULL);
@@ -139,11 +135,9 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
test_err("didn't find our key %d", ret);
- btrfs_free_path(path);
return ret;
}
btrfs_del_item(&trans, root, path);
- btrfs_free_path(path);
return 0;
}
@@ -152,7 +146,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
{
struct btrfs_trans_handle trans;
struct btrfs_extent_item *item;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
u64 refs;
int ret;
@@ -172,7 +166,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_err("couldn't find extent ref");
- btrfs_free_path(path);
return ret;
}
@@ -198,7 +191,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
return ret;
}
btrfs_del_item(&trans, root, path);
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 89ae0c7a610a..05ee4391c83a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,6 +32,8 @@
#include "ioctl.h"
#include "relocation.h"
#include "scrub.h"
+#include "ordered-data.h"
+#include "delayed-inode.h"
static struct kmem_cache *btrfs_trans_handle_cachep;
@@ -138,7 +140,6 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
void btrfs_put_transaction(struct btrfs_transaction *transaction)
{
- WARN_ON(refcount_read(&transaction->use_count) == 0);
if (refcount_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!xa_empty(&transaction->delayed_refs.head_refs));
@@ -185,7 +186,8 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
* At this point no one can be using this transaction to modify any tree
* and no one can start another transaction to modify any tree either.
*/
- ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING);
+ ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING,
+ "cur_trans->state=%d", cur_trans->state);
down_write(&fs_info->commit_root_sem);
@@ -575,7 +577,7 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
* We want to reserve all the bytes we may need all at once, so we only
* do 1 enospc flushing cycle per transaction start.
*/
- ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+ ret = btrfs_reserve_metadata_bytes(si, bytes, flush);
/*
* If we are an emergency flush, which can steal from the global block
@@ -585,7 +587,7 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
bytes -= *delayed_refs_bytes;
*delayed_refs_bytes = 0;
- ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+ ret = btrfs_reserve_metadata_bytes(si, bytes, flush);
}
return ret;
@@ -1024,13 +1026,18 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
struct btrfs_fs_info *fs_info = trans->fs_info;
if (!trans->block_rsv) {
- ASSERT(!trans->bytes_reserved);
- ASSERT(!trans->delayed_refs_bytes_reserved);
+ ASSERT(trans->bytes_reserved == 0,
+ "trans->bytes_reserved=%llu", trans->bytes_reserved);
+ ASSERT(trans->delayed_refs_bytes_reserved == 0,
+ "trans->delayed_refs_bytes_reserved=%llu",
+ trans->delayed_refs_bytes_reserved);
return;
}
if (!trans->bytes_reserved) {
- ASSERT(!trans->delayed_refs_bytes_reserved);
+ ASSERT(trans->delayed_refs_bytes_reserved == 0,
+ "trans->delayed_refs_bytes_reserved=%llu",
+ trans->delayed_refs_bytes_reserved);
return;
}
@@ -1229,7 +1236,8 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
bool errors = false;
int ret;
- ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID,
+ "root_id(log_root)=%llu", btrfs_root_id(log_root));
ret = __btrfs_wait_marked_extents(fs_info, dirty_pages);
if ((mark & EXTENT_DIRTY_LOG1) &&
@@ -1334,7 +1342,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
* At this point no one can be using this transaction to modify any tree
* and no one can start another transaction to modify any tree either.
*/
- ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
+ ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING,
+ "trans->transaction->state=%d", trans->transaction->state);
eb = btrfs_lock_root_node(fs_info->tree_root);
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
@@ -1468,7 +1477,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
* At this point no one can be using this transaction to modify any tree
* and no one can start another transaction to modify any tree either.
*/
- ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
+ ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING,
+ "trans->transaction->state=%d", trans->transaction->state);
spin_lock(&fs_info->fs_roots_radix_lock);
while (1) {
@@ -1486,9 +1496,15 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
* At this point we can neither have tasks logging inodes
* from a root nor trying to commit a log tree.
*/
- ASSERT(atomic_read(&root->log_writers) == 0);
- ASSERT(atomic_read(&root->log_commit[0]) == 0);
- ASSERT(atomic_read(&root->log_commit[1]) == 0);
+ ASSERT(atomic_read(&root->log_writers) == 0,
+ "atomic_read(&root->log_writers)=%d",
+ atomic_read(&root->log_writers));
+ ASSERT(atomic_read(&root->log_commit[0]) == 0,
+ "atomic_read(&root->log_commit[0])=%d",
+ atomic_read(&root->log_commit[0]));
+ ASSERT(atomic_read(&root->log_commit[1]) == 0,
+ "atomic_read(&root->log_commit[1])=%d",
+ atomic_read(&root->log_commit[1]));
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)btrfs_root_id(root),
@@ -2157,7 +2173,8 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
return;
lockdep_assert_held(&trans->fs_info->trans_lock);
- ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
+ ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP,
+ "cur_trans->state=%d", cur_trans->state);
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
}
@@ -2184,7 +2201,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *prev_trans = NULL;
int ret;
- ASSERT(refcount_read(&trans->use_count) == 1);
+ ASSERT(refcount_read(&trans->use_count) == 1,
+ "refcount_read(&trans->use_count)=%d", refcount_read(&trans->use_count));
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9f7c777af635..18ef069197e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -14,10 +14,6 @@
#include <linux/wait.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
-#include "extent-io-tree.h"
-#include "block-rsv.h"
-#include "messages.h"
-#include "misc.h"
struct dentry;
struct inode;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index c10b4c242acf..c21c21adf61e 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -186,7 +186,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
key->type == BTRFS_INODE_EXTREF_KEY ||
key->type == BTRFS_DIR_INDEX_KEY ||
key->type == BTRFS_DIR_ITEM_KEY ||
- key->type == BTRFS_EXTENT_DATA_KEY);
+ key->type == BTRFS_EXTENT_DATA_KEY, "key->type=%u", key->type);
/*
* Only subvolume trees along with their reloc trees need this check.
@@ -1618,10 +1618,9 @@ static int check_extent_item(struct extent_buffer *leaf,
if (unlikely(prev_end > key->objectid)) {
extent_err(leaf, slot,
- "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
- prev_key->objectid, prev_key->type,
- prev_key->offset, key->objectid, key->type,
- key->offset);
+ "previous extent " BTRFS_KEY_FMT " overlaps current extent " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(prev_key),
+ BTRFS_KEY_FMT_VALUE(key));
return -EUCLEAN;
}
}
@@ -2060,10 +2059,9 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
/* Make sure the keys are in the right order */
if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) {
generic_err(leaf, slot,
- "bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
- prev_key.objectid, prev_key.type,
- prev_key.offset, key.objectid, key.type,
- key.offset);
+ "bad key order, prev " BTRFS_KEY_FMT " current " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&prev_key),
+ BTRFS_KEY_FMT_VALUE(&key));
return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
@@ -2181,10 +2179,9 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
generic_err(node, slot,
- "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
- key.objectid, key.type, key.offset,
- next_key.objectid, next_key.type,
- next_key.offset);
+ "bad key order, current " BTRFS_KEY_FMT " next " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&key),
+ BTRFS_KEY_FMT_VALUE(&next_key));
return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 30f3c3b849c1..fff37c8d96a4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -29,6 +29,7 @@
#include "orphan.h"
#include "print-tree.h"
#include "tree-checker.h"
+#include "delayed-inode.h"
#define MAX_CONFLICT_INODES 10
@@ -198,9 +199,9 @@ static void do_abort_log_replay(struct walk_control *wc, const char *function,
if (wc->log_leaf) {
btrfs_crit(fs_info,
- "log tree (for root %llu) leaf currently being processed (slot %d key %llu %u %llu):",
+"log tree (for root %llu) leaf currently being processed (slot %d key " BTRFS_KEY_FMT "):",
btrfs_root_id(wc->root), wc->log_slot,
- wc->log_key.objectid, wc->log_key.type, wc->log_key.offset);
+ BTRFS_KEY_FMT_VALUE(&wc->log_key));
btrfs_print_leaf(wc->log_leaf);
}
@@ -262,7 +263,7 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
struct btrfs_inode *inode;
/* Only meant to be called for subvolume roots and not for log roots. */
- ASSERT(btrfs_is_fstree(btrfs_root_id(root)));
+ ASSERT(btrfs_is_fstree(btrfs_root_id(root)), "root_id=%llu", btrfs_root_id(root));
/*
* We're holding a transaction handle whether we are logging or
@@ -501,7 +502,7 @@ static int overwrite_item(struct walk_control *wc)
* the leaf before writing into the log tree. See the comments at
* copy_items() for more details.
*/
- ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID, "root_id=%llu", btrfs_root_id(root));
item_size = btrfs_item_size(wc->log_leaf, wc->log_slot);
src_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot);
@@ -510,9 +511,9 @@ static int overwrite_item(struct walk_control *wc)
ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0);
if (ret < 0) {
btrfs_abort_log_replay(wc, ret,
- "failed to search subvolume tree for key (%llu %u %llu) root %llu",
- wc->log_key.objectid, wc->log_key.type,
- wc->log_key.offset, btrfs_root_id(root));
+ "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu",
+ BTRFS_KEY_FMT_VALUE(&wc->log_key),
+ btrfs_root_id(root));
return ret;
}
@@ -601,9 +602,9 @@ static int overwrite_item(struct walk_control *wc)
insert:
btrfs_release_path(wc->subvol_path);
/* try to insert the key into the destination tree */
- wc->subvol_path->skip_release_on_error = 1;
+ wc->subvol_path->skip_release_on_error = true;
ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &wc->log_key, item_size);
- wc->subvol_path->skip_release_on_error = 0;
+ wc->subvol_path->skip_release_on_error = false;
dst_eb = wc->subvol_path->nodes[0];
dst_slot = wc->subvol_path->slots[0];
@@ -618,9 +619,8 @@ insert:
btrfs_extend_item(trans, wc->subvol_path, item_size - found_size);
} else if (ret) {
btrfs_abort_log_replay(wc, ret,
- "failed to insert item for key (%llu %u %llu)",
- wc->log_key.objectid, wc->log_key.type,
- wc->log_key.offset);
+ "failed to insert item for key " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&wc->log_key));
return ret;
}
dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot);
@@ -829,9 +829,9 @@ static noinline int replay_one_extent(struct walk_control *wc)
&wc->log_key, sizeof(*item));
if (ret) {
btrfs_abort_log_replay(wc, ret,
- "failed to insert item with key (%llu %u %llu) root %llu",
- wc->log_key.objectid, wc->log_key.type,
- wc->log_key.offset, btrfs_root_id(root));
+ "failed to insert item with key " BTRFS_KEY_FMT " root %llu",
+ BTRFS_KEY_FMT_VALUE(&wc->log_key),
+ btrfs_root_id(root));
goto out;
}
dest_offset = btrfs_item_ptr_offset(wc->subvol_path->nodes[0],
@@ -1348,9 +1348,9 @@ again:
ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0);
if (ret < 0) {
btrfs_abort_log_replay(wc, ret,
- "failed to search subvolume tree for key (%llu %u %llu) root %llu",
- search_key.objectid, search_key.type,
- search_key.offset, btrfs_root_id(root));
+ "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu",
+ BTRFS_KEY_FMT_VALUE(&search_key),
+ btrfs_root_id(root));
return ret;
} else if (ret == 0) {
/*
@@ -1483,9 +1483,9 @@ again:
}
if (ret < 0) {
btrfs_abort_log_replay(wc, ret,
- "failed to search subvolume tree for key (%llu %u %llu) root %llu",
- wc->log_key.objectid, wc->log_key.type,
- wc->log_key.offset, btrfs_root_id(root));
+ "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu",
+ BTRFS_KEY_FMT_VALUE(&wc->log_key),
+ btrfs_root_id(root));
goto out;
}
@@ -2282,7 +2282,8 @@ static noinline int replay_one_dir_item(struct walk_control *wc)
struct btrfs_dir_item *di;
/* We only log dir index keys, which only contain a single dir item. */
- ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY);
+ ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY,
+ "wc->log_key.type=%u", wc->log_key.type);
di = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_dir_item);
ret = replay_one_name(wc, di);
@@ -2434,7 +2435,7 @@ static noinline int check_item_in_log(struct walk_control *wc,
* we need to do is process the dir index keys, we (and our caller) can
* safely ignore dir item keys (key type BTRFS_DIR_ITEM_KEY).
*/
- ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY);
+ ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY, "dir_key->type=%u", dir_key->type);
eb = wc->subvol_path->nodes[0];
slot = wc->subvol_path->slots[0];
@@ -2647,7 +2648,7 @@ static noinline int replay_dir_deletes(struct walk_control *wc,
int ret = 0;
struct btrfs_key dir_key;
struct btrfs_key found_key;
- struct btrfs_path *log_path;
+ BTRFS_PATH_AUTO_FREE(log_path);
struct btrfs_inode *dir;
dir_key.objectid = dirid;
@@ -2664,7 +2665,6 @@ static noinline int replay_dir_deletes(struct walk_control *wc,
* we replay the deletes before we copy in the inode item from the log.
*/
if (IS_ERR(dir)) {
- btrfs_free_path(log_path);
ret = PTR_ERR(dir);
if (ret == -ENOENT)
ret = 0;
@@ -2700,10 +2700,9 @@ static noinline int replay_dir_deletes(struct walk_control *wc,
wc->subvol_path, 0, 0);
if (ret < 0) {
btrfs_abort_log_replay(wc, ret,
- "failed to search root %llu for key (%llu %u %llu)",
+ "failed to search root %llu for key " BTRFS_KEY_FMT,
btrfs_root_id(root),
- dir_key.objectid, dir_key.type,
- dir_key.offset);
+ BTRFS_KEY_FMT_VALUE(&dir_key));
goto out;
}
@@ -2745,7 +2744,6 @@ static noinline int replay_dir_deletes(struct walk_control *wc,
ret = 0;
out:
btrfs_release_path(wc->subvol_path);
- btrfs_free_path(log_path);
iput(&dir->vfs_inode);
return ret;
}
@@ -3340,7 +3338,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
return ctx->log_ret;
}
- ASSERT(log_transid == root->log_transid);
+ ASSERT(log_transid == root->log_transid,
+ "log_transid=%d root->log_transid=%d", log_transid, root->log_transid);
atomic_set(&root->log_commit[index1], 1);
/* wait for previous tree log sync to complete */
@@ -3480,7 +3479,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = root_log_ctx.log_ret;
goto out;
}
- ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid);
+ ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid,
+ "root_log_ctx.log_transid=%d log_root_tree->log_transid=%d",
+ root_log_ctx.log_transid, log_root_tree->log_transid);
atomic_set(&log_root_tree->log_commit[index2], 1);
if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
@@ -3584,7 +3585,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* someone else already started it. We use <= and not < because the
* first log transaction has an ID of 0.
*/
- ASSERT(btrfs_get_root_last_log_commit(root) <= log_transid);
+ ASSERT(btrfs_get_root_last_log_commit(root) <= log_transid,
+ "last_log_commit(root)=%d log_transid=%d",
+ btrfs_get_root_last_log_commit(root), log_transid);
btrfs_set_root_last_log_commit(root, log_transid);
out_wake_log_root:
@@ -3895,10 +3898,10 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
* or the entire directory.
*/
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
const struct fscrypt_str *name,
struct btrfs_inode *dir, u64 index)
{
+ struct btrfs_root *root = dir->root;
BTRFS_PATH_AUTO_FREE(path);
int ret;
@@ -3933,11 +3936,11 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
/* see comments for btrfs_del_dir_entries_in_log */
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
const struct fscrypt_str *name,
- struct btrfs_inode *inode, u64 dirid)
+ struct btrfs_inode *inode,
+ struct btrfs_inode *dir)
{
- struct btrfs_root *log;
+ struct btrfs_root *root = dir->root;
int ret;
ret = inode_logged(trans, inode, NULL);
@@ -3952,10 +3955,10 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret);
if (WARN_ON(ret))
return;
- log = root->log_root;
mutex_lock(&inode->log_mutex);
- ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, NULL);
+ ret = btrfs_del_inode_ref(trans, root->log_root, name, btrfs_ino(inode),
+ btrfs_ino(dir), NULL);
mutex_unlock(&inode->log_mutex);
if (ret < 0 && ret != -ENOENT)
btrfs_set_log_full_commit(trans);
@@ -4017,7 +4020,7 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
int count)
{
struct btrfs_root *log = inode->root->log_root;
- char *ins_data = NULL;
+ char AUTO_KFREE(ins_data);
struct btrfs_item_batch batch;
struct extent_buffer *dst;
unsigned long src_offset;
@@ -4028,7 +4031,7 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
int ret;
int i;
- ASSERT(count > 0);
+ ASSERT(count > 0, "count=%d", count);
batch.nr = count;
if (count == 1) {
@@ -4062,7 +4065,7 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
if (ret)
- goto out;
+ return ret;
dst = dst_path->nodes[0];
/*
@@ -4081,7 +4084,9 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
btrfs_release_path(dst_path);
last_index = batch.keys[count - 1].offset;
- ASSERT(last_index > inode->last_dir_index_offset);
+ ASSERT(last_index > inode->last_dir_index_offset,
+ "last_index=%llu inode->last_dir_index_offset=%llu",
+ last_index, inode->last_dir_index_offset);
/*
* If for some unexpected reason the last item's index is not greater
@@ -4094,8 +4099,6 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
if (btrfs_get_first_dir_index_to_log(inode) == 0)
btrfs_set_first_dir_index_to_log(inode, batch.keys[0].offset);
-out:
- kfree(ins_data);
return ret;
}
@@ -4154,7 +4157,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
for (int i = path->slots[0]; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
- int ret;
btrfs_item_key_to_cpu(src, &key, i);
@@ -4224,8 +4226,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
}
if (batch_size > 0) {
- int ret;
-
ret = flush_dir_items_batch(trans, inode, src, dst_path,
batch_start, batch_size);
if (ret < 0)
@@ -4410,7 +4410,9 @@ done:
* change in the current transaction), then we don't need to log
* a range, last_old_dentry_offset is == to last_offset.
*/
- ASSERT(last_old_dentry_offset <= last_offset);
+ ASSERT(last_old_dentry_offset <= last_offset,
+ "last_old_dentry_offset=%llu last_offset=%llu",
+ last_old_dentry_offset, last_offset);
if (last_old_dentry_offset < last_offset)
ret = insert_dir_log_key(trans, log, path, ino,
last_old_dentry_offset + 1,
@@ -4765,7 +4767,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_key *ins_keys;
u32 *ins_sizes;
struct btrfs_item_batch batch;
- char *ins_data;
+ char AUTO_KFREE(ins_data);
int dst_index;
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
@@ -4893,7 +4895,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
disk_bytenr + extent_num_bytes - 1,
&ordered_sums, false);
if (ret < 0)
- goto out;
+ return ret;
ret = 0;
list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) {
@@ -4903,7 +4905,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
kfree(sums);
}
if (ret)
- goto out;
+ return ret;
add_to_batch:
ins_sizes[dst_index] = btrfs_item_size(src, src_slot);
@@ -4917,11 +4919,11 @@ add_to_batch:
* so we don't need to do anything.
*/
if (batch.nr == 0)
- goto out;
+ return 0;
ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
if (ret)
- goto out;
+ return ret;
dst_index = 0;
for (int i = 0; i < nr; i++) {
@@ -4974,8 +4976,6 @@ copy_item:
}
btrfs_release_path(dst_path);
-out:
- kfree(ins_data);
return ret;
}
@@ -5414,12 +5414,12 @@ process:
set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags);
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
- spin_lock_irq(&inode->ordered_tree_lock);
+ spin_lock(&inode->ordered_tree_lock);
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
atomic_inc(&trans->transaction->pending_ordered);
}
- spin_unlock_irq(&inode->ordered_tree_lock);
+ spin_unlock(&inode->ordered_tree_lock);
}
btrfs_put_ordered_extent(ordered);
}
@@ -5694,9 +5694,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
struct btrfs_inode *inode,
u64 *other_ino, u64 *other_parent)
{
- int ret;
BTRFS_PATH_AUTO_FREE(search_path);
- char *name = NULL;
+ char AUTO_KFREE(name);
u32 name_len = 0;
u32 item_size = btrfs_item_size(eb, slot);
u32 cur_offset = 0;
@@ -5705,8 +5704,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
search_path = btrfs_alloc_path();
if (!search_path)
return -ENOMEM;
- search_path->search_commit_root = 1;
- search_path->skip_locking = 1;
+ search_path->search_commit_root = true;
+ search_path->skip_locking = true;
while (cur_offset < item_size) {
u64 parent;
@@ -5739,10 +5738,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
char *new_name;
new_name = krealloc(name, this_name_len, GFP_NOFS);
- if (!new_name) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!new_name)
+ return -ENOMEM;
name_len = this_name_len;
name = new_name;
}
@@ -5760,28 +5757,24 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
di, &di_key);
if (di_key.type == BTRFS_INODE_ITEM_KEY) {
if (di_key.objectid != key->objectid) {
- ret = 1;
*other_ino = di_key.objectid;
*other_parent = parent;
+ return 1;
} else {
- ret = 0;
+ return 0;
}
} else {
- ret = -EAGAIN;
+ return -EAGAIN;
}
- goto out;
} else if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
+ return PTR_ERR(di);
}
btrfs_release_path(search_path);
cur_offset += this_len;
}
- ret = 0;
-out:
- kfree(name);
- return ret;
+
+ return 0;
}
/*
@@ -6031,8 +6024,8 @@ static int conflicting_inode_is_dir(struct btrfs_root *root, u64 ino,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (WARN_ON_ONCE(ret > 0)) {
@@ -6052,8 +6045,8 @@ static int conflicting_inode_is_dir(struct btrfs_root *root, u64 ino,
}
btrfs_release_path(path);
- path->search_commit_root = 0;
- path->skip_locking = 0;
+ path->search_commit_root = false;
+ path->skip_locking = false;
return ret;
}
@@ -6543,7 +6536,7 @@ static int log_delayed_insertion_items(struct btrfs_trans_handle *trans,
curr = list_next_entry(curr, log_list);
}
- ASSERT(batch.nr >= 1);
+ ASSERT(batch.nr >= 1, "batch.nr=%d", batch.nr);
ret = insert_delayed_items_batch(trans, log, path, &batch, first);
curr = list_last_entry(delayed_ins_list, struct btrfs_delayed_item,
@@ -6587,7 +6580,9 @@ static int log_delayed_deletions_full(struct btrfs_trans_handle *trans,
}
last_dir_index = curr->index;
- ASSERT(last_dir_index >= first_dir_index);
+ ASSERT(last_dir_index >= first_dir_index,
+ "last_dir_index=%llu first_dir_index=%llu",
+ last_dir_index, first_dir_index);
ret = insert_dir_log_key(trans, inode->root->log_root, path,
ino, first_dir_index, last_dir_index);
@@ -6681,7 +6676,9 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans,
goto next_batch;
last_dir_index = last->index;
- ASSERT(last_dir_index >= first_dir_index);
+ ASSERT(last_dir_index >= first_dir_index,
+ "last_dir_index=%llu first_dir_index=%llu",
+ last_dir_index, first_dir_index);
/*
* If this range starts right after where the previous one ends,
* then we want to reuse the previous range item and change its
@@ -6748,7 +6745,8 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
*/
lockdep_assert_not_held(&inode->log_mutex);
- ASSERT(!ctx->logging_new_delayed_dentries);
+ ASSERT(!ctx->logging_new_delayed_dentries,
+ "ctx->logging_new_delayed_dentries=%d", ctx->logging_new_delayed_dentries);
ctx->logging_new_delayed_dentries = true;
list_for_each_entry(item, delayed_ins_list, log_list) {
@@ -7169,8 +7167,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
key.objectid = ino;
key.type = BTRFS_INODE_REF_KEY;
@@ -7203,28 +7201,24 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
item_size = btrfs_item_size(leaf, slot);
ptr = btrfs_item_ptr_offset(leaf, slot);
while (cur_offset < item_size) {
- struct btrfs_key inode_key;
+ u64 dir_id;
struct btrfs_inode *dir_inode;
- inode_key.type = BTRFS_INODE_ITEM_KEY;
- inode_key.offset = 0;
-
if (key.type == BTRFS_INODE_EXTREF_KEY) {
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)
(ptr + cur_offset);
- inode_key.objectid = btrfs_inode_extref_parent(
- leaf, extref);
+ dir_id = btrfs_inode_extref_parent(leaf, extref);
cur_offset += sizeof(*extref);
cur_offset += btrfs_inode_extref_name_len(leaf,
extref);
} else {
- inode_key.objectid = key.offset;
+ dir_id = key.offset;
cur_offset = item_size;
}
- dir_inode = btrfs_iget_logging(inode_key.objectid, root);
+ dir_inode = btrfs_iget_logging(dir_id, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -7965,7 +7959,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct fscrypt_name fname;
- ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+ ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX,
+ "old_dir_index=%llu", old_dir_index);
ret = fscrypt_setup_filename(&old_dir->vfs_inode,
&old_dentry->d_name, 0, &fname);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index dc313e6bb2fa..41e47fda036d 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -8,8 +8,7 @@
#include <linux/list.h>
#include <linux/fs.h>
-#include "messages.h"
-#include "ctree.h"
+#include <linux/fscrypt.h>
#include "transaction.h"
struct inode;
@@ -80,13 +79,12 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct dentry *dentry,
struct btrfs_log_ctx *ctx);
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
const struct fscrypt_str *name,
struct btrfs_inode *dir, u64 index);
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
const struct fscrypt_str *name,
- struct btrfs_inode *inode, u64 dirid);
+ struct btrfs_inode *inode,
+ struct btrfs_inode *dir);
void btrfs_end_log_trans(struct btrfs_root *root);
void btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 17b5e81123a1..e3a1310fa7d5 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -27,32 +27,26 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, const u8 *uuid,
u8 type, u64 subid)
{
int ret;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
int slot;
u32 item_size;
unsigned long offset;
struct btrfs_key key;
- if (WARN_ON_ONCE(!uuid_root)) {
- ret = -ENOENT;
- goto out;
- }
+ if (WARN_ON_ONCE(!uuid_root))
+ return -ENOENT;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
btrfs_uuid_to_key(uuid, type, &key);
ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
- if (ret < 0) {
- goto out;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return -ENOENT;
eb = path->nodes[0];
slot = path->slots[0];
@@ -64,7 +58,7 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, const u8 *uuid,
btrfs_warn(uuid_root->fs_info,
"uuid item with illegal size %lu!",
(unsigned long)item_size);
- goto out;
+ return ret;
}
while (item_size) {
__le64 data;
@@ -78,8 +72,6 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, const u8 *uuid,
item_size -= sizeof(data);
}
-out:
- btrfs_free_path(path);
return ret;
}
@@ -89,7 +81,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
@@ -100,18 +92,14 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ
if (ret != -ENOENT)
return ret;
- if (WARN_ON_ONCE(!uuid_root)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(!uuid_root))
+ return -EINVAL;
btrfs_uuid_to_key(uuid, type, &key);
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
sizeof(subid_le));
@@ -134,15 +122,12 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ
btrfs_warn(fs_info,
"insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
ret, key.objectid, key.offset, type);
- goto out;
+ return ret;
}
- ret = 0;
subid_le = cpu_to_le64(subid_cpu);
write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
@@ -151,7 +136,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
@@ -161,29 +146,23 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8
unsigned long move_src;
unsigned long move_len;
- if (WARN_ON_ONCE(!uuid_root)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(!uuid_root))
+ return -EINVAL;
btrfs_uuid_to_key(uuid, type, &key);
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
if (ret < 0) {
btrfs_warn(fs_info, "error %d while searching for uuid item!",
ret);
- goto out;
- }
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
+ return ret;
}
+ if (ret > 0)
+ return -ENOENT;
eb = path->nodes[0];
slot = path->slots[0];
@@ -192,8 +171,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8
if (!IS_ALIGNED(item_size, sizeof(u64))) {
btrfs_warn(fs_info, "uuid item with illegal size %lu!",
(unsigned long)item_size);
- ret = -ENOENT;
- goto out;
+ return -ENOENT;
}
while (item_size) {
__le64 read_subid;
@@ -205,16 +183,12 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8
item_size -= sizeof(read_subid);
}
- if (!item_size) {
- ret = -ENOENT;
- goto out;
- }
+ if (!item_size)
+ return -ENOENT;
item_size = btrfs_item_size(eb, slot);
- if (item_size == sizeof(subid)) {
- ret = btrfs_del_item(trans, uuid_root, path);
- goto out;
- }
+ if (item_size == sizeof(subid))
+ return btrfs_del_item(trans, uuid_root, path);
move_dst = offset;
move_src = offset + sizeof(subid);
@@ -222,9 +196,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8
memmove_extent_buffer(eb, move_dst, move_src, move_len);
btrfs_truncate_item(trans, path, item_size - sizeof(subid), 1);
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
@@ -293,7 +265,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = fs_info->uuid_root;
struct btrfs_key key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret = 0;
struct extent_buffer *leaf;
int slot;
@@ -301,10 +273,8 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info)
unsigned long offset;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
key.objectid = 0;
key.type = 0;
@@ -312,17 +282,15 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info)
again_search_slot:
ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
- if (ret) {
- if (ret > 0)
- ret = 0;
- goto out;
- }
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
while (1) {
- if (btrfs_fs_closing(fs_info)) {
- ret = -EINTR;
- goto out;
- }
+ if (btrfs_fs_closing(fs_info))
+ return -EINTR;
+
cond_resched();
leaf = path->nodes[0];
slot = path->slots[0];
@@ -353,7 +321,7 @@ again_search_slot:
ret = btrfs_check_uuid_tree_entry(fs_info, uuid,
key.type, subid_cpu);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0) {
btrfs_release_path(path);
ret = btrfs_uuid_iter_rem(root, uuid, key.type,
@@ -369,7 +337,7 @@ again_search_slot:
goto again_search_slot;
}
if (ret < 0 && ret != -ENOENT)
- goto out;
+ return ret;
key.offset++;
goto again_search_slot;
}
@@ -386,8 +354,6 @@ skip:
break;
}
-out:
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index 46bd8ca58670..06dfcb461f53 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -109,7 +109,7 @@ static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = inode->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int count = 0;
int ret;
@@ -121,10 +121,8 @@ static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
while (1) {
/* 1 for the item being dropped */
trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
/*
* Walk backwards through all the items until we find one that
@@ -143,7 +141,7 @@ static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
path->slots[0]--;
} else if (ret < 0) {
btrfs_end_transaction(trans);
- goto out;
+ return ret;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -161,17 +159,14 @@ static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
if (ret) {
btrfs_end_transaction(trans);
- goto out;
+ return ret;
}
count++;
btrfs_release_path(path);
btrfs_end_transaction(trans);
}
- ret = count;
btrfs_end_transaction(trans);
-out:
- btrfs_free_path(path);
- return ret;
+ return count;
}
/*
@@ -217,7 +212,7 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
const char *src, u64 len)
{
struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -233,10 +228,8 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
while (len > 0) {
/* 1 for the new item being inserted */
trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
key.objectid = btrfs_ino(inode);
key.type = key_type;
@@ -267,7 +260,6 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
btrfs_end_transaction(trans);
}
- btrfs_free_path(path);
return ret;
}
@@ -296,7 +288,7 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
char *dest, u64 len, struct folio *dest_folio)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -404,7 +396,6 @@ static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
}
}
out:
- btrfs_free_path(path);
if (!ret)
ret = copied;
return ret;
@@ -587,6 +578,9 @@ static int btrfs_begin_enable_verity(struct file *filp)
btrfs_assert_inode_locked(inode);
+ if (IS_ENCRYPTED(&inode->vfs_inode))
+ return -EOPNOTSUPP;
+
if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
return -EBUSY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2bec544d8ba3..ae1742a35e76 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -739,7 +739,7 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path)
{
struct path old = { .mnt = NULL, .dentry = NULL };
struct path new = { .mnt = NULL, .dentry = NULL };
- char *old_path = NULL;
+ char AUTO_KFREE(old_path);
bool is_same = false;
int ret;
@@ -765,7 +765,6 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path)
if (path_equal(&old, &new))
is_same = true;
out:
- kfree(old_path);
path_put(&old);
path_put(&new);
return is_same;
@@ -1681,7 +1680,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_key key;
struct btrfs_dev_extent *dev_extent;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
u64 search_start;
u64 hole_size;
u64 max_hole_start;
@@ -1711,8 +1710,8 @@ again:
}
path->reada = READA_FORWARD;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
key.objectid = device->devid;
key.type = BTRFS_DEV_EXTENT_KEY;
@@ -1812,7 +1811,6 @@ next:
"max_hole_start=%llu max_hole_size=%llu search_end=%llu",
max_hole_start, max_hole_size, search_end);
out:
- btrfs_free_path(path);
*start = max_hole_start;
if (len)
*len = max_hole_size;
@@ -1826,7 +1824,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_root *root = fs_info->dev_root;
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf = NULL;
@@ -1845,7 +1843,7 @@ again:
ret = btrfs_previous_item(root, path, key.objectid,
BTRFS_DEV_EXTENT_KEY);
if (ret)
- goto out;
+ return ret;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -1860,7 +1858,7 @@ again:
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_dev_extent);
} else {
- goto out;
+ return ret;
}
*dev_extent_len = btrfs_dev_extent_length(leaf, extent);
@@ -1868,8 +1866,6 @@ again:
ret = btrfs_del_item(trans, root, path);
if (ret == 0)
set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
-out:
- btrfs_free_path(path);
return ret;
}
@@ -1897,7 +1893,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
path = btrfs_alloc_path();
if (!path)
@@ -1909,13 +1905,12 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
if (ret < 0)
- goto error;
+ return ret;
if (unlikely(ret == 0)) {
/* Corruption */
btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
- ret = -EUCLEAN;
- goto error;
+ return -EUCLEAN;
}
ret = btrfs_previous_item(fs_info->chunk_root, path,
@@ -1928,10 +1923,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
path->slots[0]);
*devid_ret = found_key.offset + 1;
}
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
/*
@@ -1942,7 +1934,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dev_item *dev_item;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -1961,7 +1953,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
&key, sizeof(*dev_item));
btrfs_trans_release_chunk_metadata(trans);
if (ret)
- goto out;
+ return ret;
leaf = path->nodes[0];
dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
@@ -1987,10 +1979,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
ptr, BTRFS_FSID_SIZE);
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
/*
@@ -2002,14 +1991,11 @@ out:
static void update_dev_time(const char *device_path)
{
struct path path;
- int ret;
- ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
- if (ret)
- return;
-
- inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
- path_put(&path);
+ if (!kern_path(device_path, LOOKUP_FOLLOW, &path)) {
+ vfs_utimes(&path, NULL);
+ path_put(&path);
+ }
}
static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
@@ -2017,7 +2003,7 @@ static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = device->fs_info->chunk_root;
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
path = btrfs_alloc_path();
@@ -2031,16 +2017,12 @@ static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
btrfs_reserve_chunk_metadata(trans, false);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
btrfs_trans_release_chunk_metadata(trans);
- if (ret) {
- if (ret > 0)
- ret = -ENOENT;
- goto out;
- }
+ if (ret > 0)
+ return -ENOENT;
+ if (ret < 0)
+ return ret;
- ret = btrfs_del_item(trans, root, path);
-out:
- btrfs_free_path(path);
- return ret;
+ return btrfs_del_item(trans, root, path);
}
/*
@@ -2626,7 +2608,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
BTRFS_DEV_LOOKUP_ARGS(args);
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_dev_item *dev_item;
struct btrfs_device *device;
@@ -2648,7 +2630,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
btrfs_trans_release_chunk_metadata(trans);
if (ret < 0)
- goto error;
+ return ret;
leaf = path->nodes[0];
next_slot:
@@ -2657,7 +2639,7 @@ next_slot:
if (ret > 0)
break;
if (ret < 0)
- goto error;
+ return ret;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
btrfs_release_path(path);
@@ -2688,10 +2670,7 @@ next_slot:
path->slots[0]++;
goto next_slot;
}
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
@@ -2946,7 +2925,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = device->fs_info->chunk_root;
struct btrfs_dev_item *dev_item;
struct extent_buffer *leaf;
@@ -2962,12 +2941,10 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
- goto out;
+ return ret;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ if (ret > 0)
+ return -ENOENT;
leaf = path->nodes[0];
dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
@@ -2981,8 +2958,6 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
btrfs_device_get_disk_total_bytes(device));
btrfs_set_device_bytes_used(leaf, dev_item,
btrfs_device_get_bytes_used(device));
-out:
- btrfs_free_path(path);
return ret;
}
@@ -3035,7 +3010,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
int ret;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
path = btrfs_alloc_path();
@@ -3048,23 +3023,21 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
- goto out;
- else if (unlikely(ret > 0)) { /* Logic error or corruption */
+ return ret;
+ if (unlikely(ret > 0)) {
+ /* Logic error or corruption */
btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
chunk_offset);
btrfs_abort_transaction(trans, -ENOENT);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
ret = btrfs_del_item(trans, root, path);
if (unlikely(ret < 0)) {
btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
btrfs_abort_transaction(trans, ret);
- goto out;
+ return ret;
}
-out:
- btrfs_free_path(path);
return ret;
}
@@ -3501,7 +3474,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset,
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *chunk_root = fs_info->chunk_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_chunk *chunk;
struct btrfs_key key;
@@ -3525,7 +3498,7 @@ again:
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) {
mutex_unlock(&fs_info->reclaim_bgs_lock);
- goto error;
+ return ret;
}
if (unlikely(ret == 0)) {
/*
@@ -3535,9 +3508,8 @@ again:
* offset (one less than the previous one, wrong
* alignment and size).
*/
- ret = -EUCLEAN;
mutex_unlock(&fs_info->reclaim_bgs_lock);
- goto error;
+ return -EUCLEAN;
}
ret = btrfs_previous_item(chunk_root, path, key.objectid,
@@ -3545,7 +3517,7 @@ again:
if (ret)
mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret < 0)
- goto error;
+ return ret;
if (ret > 0)
break;
@@ -3579,8 +3551,6 @@ again:
} else if (WARN_ON(failed && retried)) {
ret = -ENOSPC;
}
-error:
- btrfs_free_path(path);
return ret;
}
@@ -4081,7 +4051,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
struct btrfs_root *chunk_root = fs_info->chunk_root;
u64 chunk_type;
struct btrfs_chunk *chunk;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
@@ -4252,7 +4222,6 @@ loop:
goto again;
}
error:
- btrfs_free_path(path);
if (enospc_errors) {
btrfs_info(fs_info, "%d enospc errors during balance",
enospc_errors);
@@ -4410,7 +4379,7 @@ static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
{
u32 size_buf = 1024;
char tmp_buf[192] = {'\0'};
- char *buf;
+ char AUTO_KFREE(buf);
char *bp;
u32 size_bp = size_buf;
int ret;
@@ -4458,8 +4427,6 @@ out_overflow:
btrfs_info(fs_info, "balance: %s %s",
(bctl->flags & BTRFS_BALANCE_RESUME) ?
"resume" : "start", buf);
-
- kfree(buf);
}
/*
@@ -4660,12 +4627,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
+
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl)
ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
- sb_end_write(fs_info->sb);
return ret;
}
@@ -4709,7 +4676,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
@@ -4724,17 +4691,14 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0) { /* ret = -ENOENT; */
- ret = 0;
- goto out;
+ return 0;
}
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!bctl)
+ return -ENOMEM;
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
@@ -4771,8 +4735,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
fs_info->balance_ctl = bctl;
spin_unlock(&fs_info->balance_lock);
mutex_unlock(&fs_info->balance_mutex);
-out:
- btrfs_free_path(path);
return ret;
}
@@ -5593,9 +5555,8 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
- struct btrfs_device_info *devices_info = NULL;
+ struct btrfs_device_info AUTO_KFREE(devices_info);
struct alloc_chunk_ctl ctl;
- struct btrfs_block_group *block_group;
int ret;
lockdep_assert_held(&info->chunk_mutex);
@@ -5628,22 +5589,14 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOMEM);
ret = gather_device_info(fs_devices, &ctl, devices_info);
- if (ret < 0) {
- block_group = ERR_PTR(ret);
- goto out;
- }
+ if (ret < 0)
+ return ERR_PTR(ret);
ret = decide_stripe_size(fs_devices, &ctl, devices_info);
- if (ret < 0) {
- block_group = ERR_PTR(ret);
- goto out;
- }
-
- block_group = create_chunk(trans, &ctl, devices_info);
+ if (ret < 0)
+ return ERR_PTR(ret);
-out:
- kfree(devices_info);
- return block_group;
+ return create_chunk(trans, &ctl, devices_info);
}
/*
@@ -6076,12 +6029,7 @@ struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
{
struct btrfs_io_context *bioc;
- bioc = kzalloc(
- /* The size of btrfs_io_context */
- sizeof(struct btrfs_io_context) +
- /* Plus the variable array for the stripes */
- sizeof(struct btrfs_io_stripe) * (total_stripes),
- GFP_NOFS);
+ bioc = kzalloc(struct_size(bioc, stripes, total_stripes), GFP_NOFS);
if (!bioc)
return NULL;
@@ -6807,6 +6755,8 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args,
const struct btrfs_device *device)
{
+ if (args->devt)
+ return device->devt == args->devt;
if (args->missing) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
!device->bdev)
@@ -7455,7 +7405,7 @@ static void readahead_tree_node_children(struct extent_buffer *node)
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = fs_info->chunk_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key;
struct btrfs_key found_key;
@@ -7494,7 +7444,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
* chunk tree, to keep it simple, just skip locking on the chunk tree.
*/
ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
- path->skip_locking = 1;
+ path->skip_locking = true;
/*
* Read all device items, and then all the chunk items. All
@@ -7572,8 +7522,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
ret = 0;
error:
mutex_unlock(&uuid_mutex);
-
- btrfs_free_path(path);
return ret;
}
@@ -7673,7 +7621,7 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
struct btrfs_device *device;
- struct btrfs_path *path = NULL;
+ BTRFS_PATH_AUTO_FREE(path);
int ret = 0;
path = btrfs_alloc_path();
@@ -7695,8 +7643,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
}
out:
mutex_unlock(&fs_devices->device_list_mutex);
-
- btrfs_free_path(path);
return ret;
}
@@ -7705,7 +7651,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *dev_root = fs_info->dev_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *eb;
struct btrfs_dev_stats_item *ptr;
@@ -7724,7 +7670,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
btrfs_warn(fs_info,
"error %d while searching for dev_stats item for device %s",
ret, btrfs_dev_name(device));
- goto out;
+ return ret;
}
if (ret == 0 &&
@@ -7735,7 +7681,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
btrfs_warn(fs_info,
"delete too small dev_stats item for device %s failed %d",
btrfs_dev_name(device), ret);
- goto out;
+ return ret;
}
ret = 1;
}
@@ -7749,7 +7695,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
btrfs_warn(fs_info,
"insert dev_stats item for device %s failed %d",
btrfs_dev_name(device), ret);
- goto out;
+ return ret;
}
}
@@ -7758,8 +7704,6 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_set_dev_stats_value(eb, ptr, i,
btrfs_dev_stat_read(device, i));
-out:
- btrfs_free_path(path);
return ret;
}
@@ -8049,7 +7993,7 @@ out:
*/
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_key key;
u64 prev_devid = 0;
@@ -8080,17 +8024,15 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
path->reada = READA_FORWARD;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out;
+ return ret;
/* No dev extents at all? Not good */
- if (unlikely(ret > 0)) {
- ret = -EUCLEAN;
- goto out;
- }
+ if (unlikely(ret > 0))
+ return -EUCLEAN;
}
while (1) {
struct extent_buffer *leaf = path->nodes[0];
@@ -8116,20 +8058,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
devid, physical_offset, prev_dev_ext_end);
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
physical_offset, physical_len);
if (ret < 0)
- goto out;
+ return ret;
prev_devid = devid;
prev_dev_ext_end = physical_offset + physical_len;
ret = btrfs_next_item(root, path);
if (ret < 0)
- goto out;
+ return ret;
if (ret > 0) {
ret = 0;
break;
@@ -8137,10 +8078,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
}
/* Ensure all chunks have corresponding dev extents */
- ret = verify_chunk_dev_extent_mapping(fs_info);
-out:
- btrfs_free_path(path);
- return ret;
+ return verify_chunk_dev_extent_mapping(fs_info);
}
/*
@@ -8177,12 +8115,12 @@ static int relocating_repair_kthread(void *data)
target = cache->start;
btrfs_put_block_group(cache);
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
+
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY",
target);
- sb_end_write(fs_info->sb);
return -EBUSY;
}
@@ -8210,7 +8148,6 @@ out:
btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
return ret;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2cbf8080eade..34b854c1a303 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -45,7 +45,7 @@ extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN_SHIFT (16)
#define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1)
-static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
+static_assert(ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
/* Used by sanity check for btrfs_raid_types. */
#define const_ffs(n) (__builtin_ctzll(n) + 1)
@@ -58,8 +58,7 @@ static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
*/
static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
-static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
- ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
+static_assert(ilog2(BTRFS_BLOCK_GROUP_RAID0) > ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
/* ilog2() can handle both constants and variables */
#define BTRFS_BG_FLAG_TO_INDEX(profile) \
@@ -662,6 +661,11 @@ struct btrfs_dev_lookup_args {
u64 devid;
u8 *uuid;
u8 *fsid;
+ /*
+ * If devt is specified, all other members will be ignored as it is
+ * enough to uniquely locate a device.
+ */
+ dev_t devt;
bool missing;
};
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 79fb1614bd0c..ab55d10bd71f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,9 +29,8 @@ int btrfs_getxattr(const struct inode *inode, const char *name,
{
struct btrfs_dir_item *di;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
- int ret = 0;
unsigned long data_ptr;
path = btrfs_alloc_path();
@@ -41,26 +40,19 @@ int btrfs_getxattr(const struct inode *inode, const char *name,
/* lookup the xattr by name */
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)),
name, strlen(name), 0);
- if (!di) {
- ret = -ENODATA;
- goto out;
- } else if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
+ if (!di)
+ return -ENODATA;
+ if (IS_ERR(di))
+ return PTR_ERR(di);
leaf = path->nodes[0];
/* if size is 0, that means we want the size of the attr */
- if (!size) {
- ret = btrfs_dir_data_len(leaf, di);
- goto out;
- }
+ if (!size)
+ return btrfs_dir_data_len(leaf, di);
/* now get the data out of our dir_item */
- if (btrfs_dir_data_len(leaf, di) > size) {
- ret = -ERANGE;
- goto out;
- }
+ if (btrfs_dir_data_len(leaf, di) > size)
+ return -ERANGE;
/*
* The way things are packed into the leaf is like this
@@ -73,11 +65,7 @@ int btrfs_getxattr(const struct inode *inode, const char *name,
btrfs_dir_name_len(leaf, di));
read_extent_buffer(leaf, buffer, data_ptr,
btrfs_dir_data_len(leaf, di));
- ret = btrfs_dir_data_len(leaf, di);
-
-out:
- btrfs_free_path(path);
- return ret;
+ return btrfs_dir_data_len(leaf, di);
}
int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
@@ -85,7 +73,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
{
struct btrfs_dir_item *di = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
size_t name_len = strlen(name);
int ret = 0;
@@ -97,7 +85,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->skip_release_on_error = 1;
+ path->skip_release_on_error = true;
if (!value) {
di = btrfs_lookup_xattr(trans, root, path,
@@ -212,7 +200,6 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
*/
}
out:
- btrfs_free_path(path);
if (!ret) {
set_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
@@ -278,7 +265,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct btrfs_key key;
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int iter_ret = 0;
int ret = 0;
size_t total_size = 0, size_left = size;
@@ -354,8 +341,6 @@ next:
else
ret = total_size;
- btrfs_free_path(path);
-
return ret;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d1db7fa1fe58..359a98e6de85 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -37,8 +37,8 @@
#define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G)
#define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G)
-#define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET)
-#define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET)
+#define BTRFS_SB_LOG_FIRST_SHIFT ilog2(BTRFS_SB_LOG_FIRST_OFFSET)
+#define BTRFS_SB_LOG_SECOND_SHIFT ilog2(BTRFS_SB_LOG_SECOND_OFFSET)
/* Number of superblock log zones */
#define BTRFS_NR_SB_LOG_ZONES 2
@@ -93,7 +93,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
sector_t sector;
for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
- ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
+ ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL,
+ "zones[%d].type=%d", i, zones[i].type);
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
full[i] = sb_zone_is_full(&zones[i]);
}
@@ -166,14 +167,14 @@ static inline u32 sb_zone_number(int shift, int mirror)
{
u64 zone = U64_MAX;
- ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
+ ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX, "mirror=%d", mirror);
switch (mirror) {
case 0: zone = 0; break;
case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break;
case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break;
}
- ASSERT(zone <= U32_MAX);
+ ASSERT(zone <= U32_MAX, "zone=%llu", zone);
return (u32)zone;
}
@@ -240,7 +241,8 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
unsigned int i;
u32 zno;
- ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
+ ASSERT(IS_ALIGNED(pos, zinfo->zone_size),
+ "pos=%llu zinfo->zone_size=%llu", pos, zinfo->zone_size);
zno = pos >> zinfo->zone_size_shift;
/*
* We cannot report zones beyond the zone end. So, it is OK to
@@ -264,8 +266,8 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
}
}
- ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
- copy_zone_info_cb, zones);
+ ret = blkdev_report_zones_cached(device->bdev, pos >> SECTOR_SHIFT,
+ *nr_zones, copy_zone_info_cb, zones);
if (ret < 0) {
btrfs_err(device->fs_info,
"zoned: failed to read zone %llu on %s (devid %llu)",
@@ -494,6 +496,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
case BLK_ZONE_COND_CLOSED:
+ case BLK_ZONE_COND_ACTIVE:
__set_bit(nreported, zone_info->active_zones);
nactive++;
break;
@@ -896,9 +899,9 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
if (sb_zone + 1 >= nr_zones)
return -ENOENT;
- ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev),
- BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb,
- zones);
+ ret = blkdev_report_zones_cached(bdev, zone_start_sector(sb_zone, bdev),
+ BTRFS_NR_SB_LOG_ZONES,
+ copy_zone_info_cb, zones);
if (ret < 0)
return ret;
if (unlikely(ret != BTRFS_NR_SB_LOG_ZONES))
@@ -1055,8 +1058,10 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
bool have_sb;
int i;
- ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size));
- ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size));
+ ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size),
+ "hole_start=%llu zinfo->zone_size=%llu", hole_start, zinfo->zone_size);
+ ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size),
+ "num_bytes=%llu zinfo->zone_size=%llu", num_bytes, zinfo->zone_size);
while (pos < hole_end) {
begin = pos >> shift;
@@ -1172,8 +1177,10 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
u64 pos;
int ret;
- ASSERT(IS_ALIGNED(start, zinfo->zone_size));
- ASSERT(IS_ALIGNED(size, zinfo->zone_size));
+ ASSERT(IS_ALIGNED(start, zinfo->zone_size),
+ "start=%llu, zinfo->zone_size=%llu", start, zinfo->zone_size);
+ ASSERT(IS_ALIGNED(size, zinfo->zone_size),
+ "size=%llu, zinfo->zone_size=%llu", size, zinfo->zone_size);
if (begin + nbits > zinfo->nr_zones)
return -ERANGE;
@@ -1628,7 +1635,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
struct btrfs_chunk_map *map;
u64 logical = cache->start;
u64 length = cache->length;
- struct zone_info *zone_info = NULL;
+ struct zone_info AUTO_KFREE(zone_info);
int ret;
int i;
unsigned long *active = NULL;
@@ -1782,7 +1789,6 @@ out:
cache->physical_map = NULL;
}
bitmap_free(active);
- kfree(zone_info);
return ret;
}
@@ -1809,14 +1815,14 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT);
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
bool ret = false;
if (!btrfs_is_zoned(fs_info))
return false;
- if (!inode || !is_data_inode(inode))
+ if (!is_data_inode(inode))
return false;
if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
@@ -1867,7 +1873,7 @@ static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
em = btrfs_search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes);
/* The em should be a new COW extent, thus it should not have an offset. */
- ASSERT(em->offset == 0);
+ ASSERT(em->offset == 0, "em->offset=%llu", em->offset);
em->disk_bytenr = logical;
btrfs_free_extent_map(em);
write_unlock(&em_tree->lock);
@@ -2578,7 +2584,8 @@ again:
struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
int factor;
- ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC,
+ "reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
factor = btrfs_bg_type_to_factor(bg->flags);
down_write(&space_info->groups_sem);
@@ -2592,9 +2599,9 @@ again:
space_info->disk_total -= bg->length * factor;
space_info->disk_total -= bg->zone_unusable;
/* There is no allocation ever happened. */
- ASSERT(bg->used == 0);
+ ASSERT(bg->used == 0, "bg->used=%llu", bg->used);
/* No super block in a block group on the zoned setup. */
- ASSERT(bg->bytes_super == 0);
+ ASSERT(bg->bytes_super == 0, "bg->bytes_super=%llu", bg->bytes_super);
spin_unlock(&space_info->lock);
bg->space_info = reloc_sinfo;
@@ -2620,7 +2627,8 @@ again:
/* Allocate new BG in the data relocation space_info. */
space_info = data_sinfo->sub_group[0];
- ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC,
+ "space_info->subgroup_id=%d", space_info->subgroup_id);
ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
if (ret == 1) {
@@ -2750,10 +2758,9 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
return ret < 0 ? ret : 1;
}
-int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- bool do_finish)
+int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_finish)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_block_group *bg;
int index;
@@ -2962,7 +2969,8 @@ int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info, u64 num
* This holds because we currently reset fully used then freed
* block group.
*/
- ASSERT(reclaimed == bg->zone_capacity);
+ ASSERT(reclaimed == bg->zone_capacity,
+ "reclaimed=%llu bg->zone_capacity=%llu", reclaimed, bg->zone_capacity);
bg->free_space_ctl->free_space += reclaimed;
space_info->bytes_zone_unusable -= reclaimed;
spin_unlock(&bg->lock);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 17c5656580dd..5cefdeb08b7b 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -15,7 +15,6 @@
#include "disk-io.h"
#include "block-group.h"
#include "btrfs_inode.h"
-#include "fs.h"
struct block_device;
struct extent_buffer;
@@ -94,8 +93,7 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info);
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
-int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, bool do_finish);
+int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_finish);
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info, u64 num_bytes);
#else /* CONFIG_BLK_DEV_ZONED */
@@ -262,8 +260,7 @@ static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
return 1;
}
-static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static inline int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info,
bool do_finish)
{
/* Consider all the block groups are active */