summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/block-group.c10
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/defrag.c14
-rw-r--r--fs/btrfs/extent_io.c29
-rw-r--r--fs/btrfs/file.c19
-rw-r--r--fs/btrfs/inode.c89
-rw-r--r--fs/btrfs/ioctl.c41
-rw-r--r--fs/btrfs/misc.h5
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/subpage.c5
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/btrfs/volumes.c20
-rw-r--r--fs/btrfs/zoned.c60
15 files changed, 147 insertions, 162 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5322ef2ae015..08cdda47509f 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1850,12 +1850,10 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!btrfs_should_reclaim(fs_info))
return;
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
- if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
- sb_end_write(fs_info->sb);
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
return;
- }
/*
* Long running balances can keep us blocked here for eternity, so
@@ -1863,7 +1861,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
return;
}
@@ -1947,7 +1944,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
/*
* Get out fast, in case we're read-only or unmounting the
* filesystem. It is OK to drop block groups from the list even
- * for the read-only case. As we did sb_start_write(),
+ * for the read-only case. As we did take the super write lock,
* "mount -o remount,ro" won't happen and read-only filesystem
* means it is forced read-only due to a fatal error. So, it
* never gets back to read-write to let us reclaim again.
@@ -2030,7 +2027,6 @@ end:
list_splice_tail(&retry_list, &fs_info->reclaim_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
}
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index eba188a9e3bb..aee1fd21cdd6 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -85,8 +85,8 @@ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u6
{
/* @cur must be inside the folio. */
ASSERT(folio_pos(folio) <= cur);
- ASSERT(cur < folio_end(folio));
- return min(range_end, folio_end(folio)) - cur;
+ ASSERT(cur < folio_next_pos(folio));
+ return umin(range_end, folio_next_pos(folio)) - cur;
}
int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 7b277934f66f..a7f20f048398 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -254,10 +254,9 @@ again:
range.extent_thresh = defrag->extent_thresh;
file_ra_state_init(ra, inode->vfs_inode.i_mapping);
- sb_start_write(fs_info->sb);
- ret = btrfs_defrag_file(inode, ra, &range, defrag->transid,
- BTRFS_DEFRAG_BATCH);
- sb_end_write(fs_info->sb);
+ scoped_guard(super_write, fs_info->sb)
+ ret = btrfs_defrag_file(inode, ra, &range,
+ defrag->transid, BTRFS_DEFRAG_BATCH);
iput(&inode->vfs_inode);
if (ret < 0)
@@ -886,7 +885,7 @@ again:
}
lock_start = folio_pos(folio);
- lock_end = folio_end(folio) - 1;
+ lock_end = folio_next_pos(folio) - 1;
/* Wait for any existing ordered extent in the range */
while (1) {
struct btrfs_ordered_extent *ordered;
@@ -1178,7 +1177,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
if (!folio)
break;
- if (start >= folio_end(folio) || start + len <= folio_pos(folio))
+ if (start >= folio_next_pos(folio) ||
+ start + len <= folio_pos(folio))
continue;
btrfs_folio_clamp_clear_checked(fs_info, folio, start, len);
btrfs_folio_clamp_set_dirty(fs_info, folio, start, len);
@@ -1219,7 +1219,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
folios[i] = NULL;
goto free_folios;
}
- cur = folio_end(folios[i]);
+ cur = folio_next_pos(folios[i]);
}
for (int i = 0; i < nr_pages; i++) {
if (!folios[i])
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 755ec6dfd51c..7361d5d890d2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -333,7 +333,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
goto out;
}
range_start = max_t(u64, folio_pos(folio), start);
- range_len = min_t(u64, folio_end(folio), end + 1) - range_start;
+ range_len = min_t(u64, folio_next_pos(folio), end + 1) - range_start;
btrfs_folio_set_lock(fs_info, folio, range_start, range_len);
processed_end = range_start + range_len - 1;
@@ -387,7 +387,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
ASSERT(orig_end > orig_start);
/* The range should at least cover part of the folio */
- ASSERT(!(orig_start >= folio_end(locked_folio) ||
+ ASSERT(!(orig_start >= folio_next_pos(locked_folio) ||
orig_end <= folio_pos(locked_folio)));
again:
/* step one, find a bunch of delalloc bytes starting at start */
@@ -493,7 +493,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
ASSERT(folio_pos(folio) <= start &&
- start + len <= folio_end(folio));
+ start + len <= folio_next_pos(folio));
if (uptodate && btrfs_verify_folio(folio, start, len))
btrfs_folio_set_uptodate(fs_info, folio, start, len);
@@ -1201,7 +1201,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* finished our folio read and unlocked the folio.
*/
if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_end(folio),
+ u64 range_len = umin(folio_next_pos(folio),
ordered->file_offset + ordered->num_bytes) - cur;
ret = true;
@@ -1223,7 +1223,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* So we return true and update @next_ret to the OE/folio boundary.
*/
if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_end(folio),
+ u64 range_len = umin(folio_next_pos(folio),
ordered->file_offset + ordered->num_bytes) - cur;
/*
@@ -2215,7 +2215,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_end(folio),
+ u32 range_len = min_t(u64, folio_next_pos(folio),
eb->start + eb->len) - range_start;
folio_lock(folio);
@@ -2228,6 +2228,14 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
wbc_account_cgroup_owner(wbc, folio, range_len);
folio_unlock(folio);
}
+ /*
+ * If the fs is already in error status, do not submit any writeback
+ * but immediately finish it.
+ */
+ if (unlikely(BTRFS_FS_ERROR(fs_info))) {
+ btrfs_bio_end_io(bbio, errno_to_blk_status(BTRFS_FS_ERROR(fs_info)));
+ return;
+ }
btrfs_submit_bbio(bbio, 0);
}
@@ -2460,10 +2468,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
&BTRFS_I(inode)->runtime_flags))
wbc->tagged_writepages = 1;
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
+ tag = wbc_to_tag(wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
@@ -2619,7 +2624,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
continue;
}
- cur_end = min_t(u64, folio_end(folio) - 1, end);
+ cur_end = min_t(u64, folio_next_pos(folio) - 1, end);
cur_len = cur_end + 1 - cur;
ASSERT(folio_test_locked(folio));
@@ -3860,7 +3865,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_end(folio),
+ u32 range_len = min_t(u64, folio_next_pos(folio),
eb->start + eb->len) - range_start;
bio_add_folio_nofail(&bbio->bio, folio, range_len,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7efd1f8a1912..e7453f992e1e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -89,7 +89,8 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos
num_bytes = round_up(write_bytes + pos - start_pos,
fs_info->sectorsize);
ASSERT(num_bytes <= U32_MAX);
- ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes);
+ ASSERT(folio_pos(folio) <= pos &&
+ folio_next_pos(folio) >= pos + write_bytes);
end_of_last_block = start_pos + num_bytes - 1;
@@ -799,7 +800,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
u64 len)
{
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
- u64 clamp_end = min_t(u64, pos + len, folio_end(folio));
+ u64 clamp_end = min_t(u64, pos + len, folio_next_pos(folio));
const u32 blocksize = inode_to_fs_info(inode)->sectorsize;
int ret = 0;
@@ -1254,8 +1255,8 @@ again:
* The reserved range goes beyond the current folio, shrink the reserved
* space to the folio boundary.
*/
- if (reserved_start + reserved_len > folio_end(folio)) {
- const u64 last_block = folio_end(folio);
+ if (reserved_start + reserved_len > folio_next_pos(folio)) {
+ const u64 last_block = folio_next_pos(folio);
shrink_reserved_space(inode, *data_reserved, reserved_start,
reserved_len, last_block - reserved_start,
@@ -2854,12 +2855,22 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 range_start;
+ u64 range_end;
int ret;
int ret2;
if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
return 0;
+ range_start = round_down(i_size_read(inode), root->fs_info->sectorsize);
+ range_end = round_up(end, root->fs_info->sectorsize);
+
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), range_start,
+ range_end - range_start);
+ if (ret)
+ return ret;
+
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b1b3a0553ee..9c6ca87b3d56 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9,6 +9,7 @@
#include <linux/blk-cgroup.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fs_struct.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
@@ -177,8 +178,10 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
return ret;
}
ret = paths_from_inode(inum, ipath);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_put_root(local_root);
goto err;
+ }
/*
* We deliberately ignore the bit ipath might have been too small to
@@ -409,7 +412,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
continue;
}
- index = folio_end(folio) >> PAGE_SHIFT;
+ index = folio_next_index(folio);
/*
* Here we just clear all Ordered bits for every page in the
* range, then btrfs_mark_ordered_io_finished() will handle
@@ -2336,7 +2339,8 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
* The range must cover part of the @locked_folio, or a return of 1
* can confuse the caller.
*/
- ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio)));
+ ASSERT(!(end <= folio_pos(locked_folio) ||
+ start >= folio_next_pos(locked_folio)));
if (should_nocow(inode, start, end)) {
ret = run_delalloc_nocow(inode, locked_folio, start, end);
@@ -2743,7 +2747,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct btrfs_inode *inode = fixup->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 page_start = folio_pos(folio);
- u64 page_end = folio_end(folio) - 1;
+ u64 page_end = folio_next_pos(folio) - 1;
int ret = 0;
bool free_delalloc_space = true;
@@ -3884,7 +3888,7 @@ static int btrfs_add_inode_to_root(struct btrfs_inode *inode, bool prealloc)
ASSERT(ret != -ENOMEM);
return ret;
} else if (existing) {
- WARN_ON(!(existing->vfs_inode.i_state & (I_WILL_FREE | I_FREEING)));
+ WARN_ON(!(inode_state_read_once(&existing->vfs_inode) & (I_WILL_FREE | I_FREEING)));
}
return 0;
@@ -4855,7 +4859,7 @@ again:
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = folio_end(folio);
+ zero_end = folio_next_pos(folio);
folio_zero_range(folio, zero_start - folio_pos(folio),
zero_end - zero_start);
@@ -5038,7 +5042,7 @@ again:
* not reach disk, it still affects our page caches.
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = min_t(u64, folio_end(folio) - 1, end);
+ zero_end = min_t(u64, folio_next_pos(folio) - 1, end);
} else {
zero_start = max_t(u64, block_start, start);
zero_end = min_t(u64, block_end, end);
@@ -5361,7 +5365,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct rb_node *node;
- ASSERT(inode->i_state & I_FREEING);
+ ASSERT(inode_state_read_once(inode) & I_FREEING);
truncate_inode_pages_final(&inode->i_data);
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);
@@ -5799,7 +5803,7 @@ struct btrfs_inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->vfs_inode.i_state & I_NEW))
+ if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW))
return inode;
ret = btrfs_read_locked_inode(inode, path);
@@ -5823,7 +5827,7 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->vfs_inode.i_state & I_NEW))
+ if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW))
return inode;
path = btrfs_alloc_path();
@@ -5837,6 +5841,8 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
if (ret)
return ERR_PTR(ret);
+ if (S_ISDIR(inode->vfs_inode.i_mode))
+ inode->vfs_inode.i_opflags |= IOP_FASTPERM_MAY_EXEC;
unlock_new_inode(&inode->vfs_inode);
return inode;
}
@@ -6289,8 +6295,8 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
}
/*
- * This is a copy of file_update_time. We need this so we can return error on
- * ENOSPC for updating the inode in the case of file write and mmap writes.
+ * We need our own ->update_time so that we can return error on ENOSPC for
+ * updating the inode in the case of file write and mmap writes.
*/
static int btrfs_update_time(struct inode *inode, int flags)
{
@@ -6788,8 +6794,11 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
}
ret = btrfs_create_new_inode(trans, &new_inode_args);
- if (!ret)
+ if (!ret) {
+ if (S_ISDIR(inode->i_mode))
+ inode->i_opflags |= IOP_FASTPERM_MAY_EXEC;
d_instantiate_new(dentry, inode);
+ }
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -6873,7 +6882,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
BTRFS_I(inode)->dir_index = 0ULL;
inode_inc_iversion(inode);
inode_set_ctime_current(inode);
- set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
&fname.disk_name, 1, index);
@@ -7480,7 +7488,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
u64 page_start = folio_pos(folio);
u64 page_end = page_start + folio_size(folio) - 1;
u64 cur;
- int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
+ int inode_evicting = inode_state_read_once(&inode->vfs_inode) & I_FREEING;
/*
* We have folio locked so no new ordered extent can be created on this
@@ -8709,15 +8717,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int start_delalloc_inodes(struct btrfs_root *root,
- struct writeback_control *wbc, bool snapshot,
- bool in_reclaim_context)
+static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write,
+ bool snapshot, bool in_reclaim_context)
{
struct btrfs_delalloc_work *work, *next;
LIST_HEAD(works);
LIST_HEAD(splice);
int ret = 0;
- bool full_flush = wbc->nr_to_write == LONG_MAX;
mutex_lock(&root->delalloc_mutex);
spin_lock(&root->delalloc_lock);
@@ -8743,10 +8749,10 @@ static int start_delalloc_inodes(struct btrfs_root *root,
if (snapshot)
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags);
- if (full_flush) {
- work = btrfs_alloc_delalloc_work(&inode->vfs_inode);
+ if (nr_to_write == NULL) {
+ work = btrfs_alloc_delalloc_work(tmp_inode);
if (!work) {
- iput(&inode->vfs_inode);
+ iput(tmp_inode);
ret = -ENOMEM;
goto out;
}
@@ -8754,9 +8760,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
btrfs_queue_work(root->fs_info->flush_workers,
&work->work);
} else {
- ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc);
+ ret = filemap_flush_nr(tmp_inode->i_mapping,
+ nr_to_write);
btrfs_add_delayed_iput(inode);
- if (ret || wbc->nr_to_write <= 0)
+
+ if (ret || *nr_to_write <= 0)
goto out;
}
cond_resched();
@@ -8782,29 +8790,17 @@ out:
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{
- struct writeback_control wbc = {
- .nr_to_write = LONG_MAX,
- .sync_mode = WB_SYNC_NONE,
- .range_start = 0,
- .range_end = LLONG_MAX,
- };
struct btrfs_fs_info *fs_info = root->fs_info;
if (BTRFS_FS_ERROR(fs_info))
return -EROFS;
-
- return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
+ return start_delalloc_inodes(root, NULL, true, in_reclaim_context);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
bool in_reclaim_context)
{
- struct writeback_control wbc = {
- .nr_to_write = nr,
- .sync_mode = WB_SYNC_NONE,
- .range_start = 0,
- .range_end = LLONG_MAX,
- };
+ long *nr_to_write = nr == LONG_MAX ? NULL : &nr;
struct btrfs_root *root;
LIST_HEAD(splice);
int ret;
@@ -8816,13 +8812,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
while (!list_empty(&splice)) {
- /*
- * Reset nr_to_write here so we know that we're doing a full
- * flush.
- */
- if (nr == LONG_MAX)
- wbc.nr_to_write = LONG_MAX;
-
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
root = btrfs_grab_root(root);
@@ -8831,9 +8820,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
+ ret = start_delalloc_inodes(root, nr_to_write, false,
+ in_reclaim_context);
btrfs_put_root(root);
- if (ret < 0 || wbc.nr_to_write <= 0)
+ if (ret < 0 || nr <= 0)
goto out;
spin_lock(&fs_info->delalloc_root_lock);
}
@@ -9169,6 +9159,11 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
+/*
+ * NOTE: in case you are adding MAY_EXEC check for directories:
+ * we are marking them with IOP_FASTPERM_MAY_EXEC, allowing path lookup to
+ * elide calls here.
+ */
static int btrfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8cb7d5a462ef..b138120feba3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
int ret;
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- return ret;
-
- dentry = lookup_one(idmap, qname, parent);
- ret = PTR_ERR(dentry);
+ dentry = start_creating_killable(idmap, parent, qname);
if (IS_ERR(dentry))
- goto out_unlock;
+ return PTR_ERR(dentry);
ret = btrfs_may_create(idmap, dir, dentry);
if (ret)
@@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
up_read(&fs_info->subvol_sem);
out_dput:
- dput(dentry);
-out_unlock:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+ end_creating(dentry);
return ret;
}
@@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto free_subvol_name;
}
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- goto free_subvol_name;
- dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
+ dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
- goto out_unlock_dir;
- }
-
- if (d_really_is_negative(dentry)) {
- ret = -ENOENT;
- goto out_dput;
+ goto out_end_removing;
}
inode = d_inode(dentry);
@@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EPERM;
if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
- goto out_dput;
+ goto out_end_removing;
/*
* Do not allow deletion if the parent dir is the same
@@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EINVAL;
if (root == dest)
- goto out_dput;
+ goto out_end_removing;
ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
if (ret)
- goto out_dput;
+ goto out_end_removing;
}
/* check if subvolume may be deleted by a user */
ret = btrfs_may_delete(idmap, dir, dentry, 1);
if (ret)
- goto out_dput;
+ goto out_end_removing;
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
- goto out_dput;
+ goto out_end_removing;
}
btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (!ret)
d_delete_notify(dir, dentry);
-out_dput:
- dput(dentry);
-out_unlock_dir:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+out_end_removing:
+ end_removing(dentry);
free_subvol_name:
kfree(subvol_name_ptr);
free_parent:
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 60f9b000d644..17b71e1285e5 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -209,9 +209,4 @@ static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
return (found_set == start + nbits);
}
-static inline u64 folio_end(struct folio *folio)
-{
- return folio_pos(folio) + folio_size(folio);
-}
-
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2829f20d7bb5..7fedebbee558 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -359,7 +359,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
if (folio) {
ASSERT(folio->mapping);
ASSERT(folio_pos(folio) <= file_offset);
- ASSERT(file_offset + len <= folio_end(folio));
+ ASSERT(file_offset + len <= folio_next_pos(folio));
/*
* Ordered flag indicates whether we still have
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1175b8192cd7..31ad8580322a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1539,8 +1539,10 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst
ASSERT(prealloc);
/* Check the level of src and dst first */
- if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
+ if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) {
+ kfree(prealloc);
return -EINVAL;
+ }
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 651b11884f82..ba20d9286a34 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2203,6 +2203,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
&length, &bioc, NULL, NULL);
if (ret < 0) {
+ bio_put(bio);
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
goto out;
@@ -2212,6 +2213,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
btrfs_put_bioc(bioc);
if (!rbio) {
ret = -ENOMEM;
+ bio_put(bio);
btrfs_bio_counter_dec(fs_info);
goto out;
}
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 5ca8d4db6722..a7ba868e9372 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -186,7 +186,8 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
* unmapped page like dummy extent buffer pages.
*/
if (folio->mapping)
- ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio),
+ ASSERT(folio_pos(folio) <= start &&
+ start + len <= folio_next_pos(folio),
"start=%llu len=%u folio_pos=%llu folio_size=%zu",
start, len, folio_pos(folio), folio_size(folio));
}
@@ -217,7 +218,7 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
if (folio_pos(folio) >= orig_start + orig_len)
*len = 0;
else
- *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start;
+ *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start;
}
static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 621e0df097e3..30f3c3b849c1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -7122,7 +7122,7 @@ log_extents:
* a power failure unless the log was synced as part of an fsync
* against any other unrelated inode.
*/
- if (inode_only != LOG_INODE_EXISTS)
+ if (!ctx->logging_new_name && inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
@@ -7910,6 +7910,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
bool log_pinned = false;
int ret;
+ /* The inode has a new name (ref/extref), so make sure we log it. */
+ set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags);
+
btrfs_init_log_ctx(&ctx, inode);
ctx.logging_new_name = true;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2bec544d8ba3..cc8aa4a04348 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2002,14 +2002,11 @@ out:
static void update_dev_time(const char *device_path)
{
struct path path;
- int ret;
-
- ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
- if (ret)
- return;
- inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
- path_put(&path);
+ if (!kern_path(device_path, LOOKUP_FOLLOW, &path)) {
+ vfs_utimes(&path, NULL);
+ path_put(&path);
+ }
}
static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
@@ -4660,12 +4657,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
+
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl)
ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
- sb_end_write(fs_info->sb);
return ret;
}
@@ -8177,12 +8174,12 @@ static int relocating_repair_kthread(void *data)
target = cache->start;
btrfs_put_block_group(cache);
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
+
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY",
target);
- sb_end_write(fs_info->sb);
return -EBUSY;
}
@@ -8210,7 +8207,6 @@ out:
btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
return ret;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 0ea0df18a8e4..d1db7fa1fe58 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1317,6 +1317,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
if (!btrfs_dev_is_sequential(device, info->physical)) {
up_read(&dev_replace->rwsem);
info->alloc_offset = WP_CONVENTIONAL;
+ info->capacity = device->zone_info->zone_size;
return 0;
}
@@ -1522,6 +1523,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 stripe_nr = 0, stripe_offset = 0;
+ u32 stripe_index = 0;
if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
@@ -1529,28 +1532,26 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
return -EINVAL;
}
+ if (last_alloc) {
+ u32 factor = map->num_stripes;
+
+ stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK;
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+ }
+
for (int i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
- u64 stripe_nr, full_stripe_nr;
- u64 stripe_offset;
- int stripe_index;
- stripe_nr = div64_u64(last_alloc, map->stripe_size);
- stripe_offset = stripe_nr * map->stripe_size;
- full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
- div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
-
- zone_info[i].alloc_offset =
- full_stripe_nr * map->stripe_size;
+ zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr);
if (stripe_index > i)
- zone_info[i].alloc_offset += map->stripe_size;
+ zone_info[i].alloc_offset += BTRFS_STRIPE_LEN;
else if (stripe_index == i)
- zone_info[i].alloc_offset +=
- (last_alloc - stripe_offset);
+ zone_info[i].alloc_offset += stripe_offset;
}
if (test_bit(0, active) != test_bit(i, active)) {
@@ -1574,6 +1575,8 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 stripe_nr = 0, stripe_offset = 0;
+ u32 stripe_index = 0;
if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
@@ -1581,6 +1584,14 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
return -EINVAL;
}
+ if (last_alloc) {
+ u32 factor = map->num_stripes / map->sub_stripes;
+
+ stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK;
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+ }
+
for (int i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
@@ -1594,26 +1605,12 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
- u64 stripe_nr, full_stripe_nr;
- u64 stripe_offset;
- int stripe_index;
-
- stripe_nr = div64_u64(last_alloc, map->stripe_size);
- stripe_offset = stripe_nr * map->stripe_size;
- full_stripe_nr = div_u64(stripe_nr,
- map->num_stripes / map->sub_stripes);
- div_u64_rem(stripe_nr,
- (map->num_stripes / map->sub_stripes),
- &stripe_index);
-
- zone_info[i].alloc_offset =
- full_stripe_nr * map->stripe_size;
+ zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr);
if (stripe_index > (i / map->sub_stripes))
- zone_info[i].alloc_offset += map->stripe_size;
+ zone_info[i].alloc_offset += BTRFS_STRIPE_LEN;
else if (stripe_index == (i / map->sub_stripes))
- zone_info[i].alloc_offset +=
- (last_alloc - stripe_offset);
+ zone_info[i].alloc_offset += stripe_offset;
}
if ((i % map->sub_stripes) == 0) {
@@ -1683,8 +1680,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags);
if (num_conventional > 0) {
- /* Zone capacity is always zone size in emulation */
- cache->zone_capacity = cache->length;
ret = calculate_alloc_pointer(cache, &last_alloc, new);
if (ret) {
btrfs_err(fs_info,
@@ -1693,6 +1688,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
} else if (map->num_stripes == num_conventional) {
cache->alloc_offset = last_alloc;
+ cache->zone_capacity = cache->length;
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
goto out;
}