diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/block-group.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/compression.h | 4 | ||||
| -rw-r--r-- | fs/btrfs/defrag.c | 14 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.h | 7 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 31 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 19 | ||||
| -rw-r--r-- | fs/btrfs/free-space-tree.c | 15 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 89 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 43 | ||||
| -rw-r--r-- | fs/btrfs/misc.h | 5 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/ref-verify.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 13 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 60 | ||||
| -rw-r--r-- | fs/btrfs/subpage.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/tree-checker.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 20 | ||||
| -rw-r--r-- | fs/btrfs/zoned.c | 62 |
23 files changed, 236 insertions, 195 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5322ef2ae015..08cdda47509f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1850,12 +1850,10 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) if (!btrfs_should_reclaim(fs_info)) return; - sb_start_write(fs_info->sb); + guard(super_write)(fs_info->sb); - if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { - sb_end_write(fs_info->sb); + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) return; - } /* * Long running balances can keep us blocked here for eternity, so @@ -1863,7 +1861,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { btrfs_exclop_finish(fs_info); - sb_end_write(fs_info->sb); return; } @@ -1947,7 +1944,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) /* * Get out fast, in case we're read-only or unmounting the * filesystem. It is OK to drop block groups from the list even - * for the read-only case. As we did sb_start_write(), + * for the read-only case. As we did take the super write lock, * "mount -o remount,ro" won't happen and read-only filesystem * means it is forced read-only due to a fatal error. So, it * never gets back to read-write to let us reclaim again. @@ -2030,7 +2027,6 @@ end: list_splice_tail(&retry_list, &fs_info->reclaim_bgs); spin_unlock(&fs_info->unused_bgs_lock); btrfs_exclop_finish(fs_info); - sb_end_write(fs_info->sb); } void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index eba188a9e3bb..aee1fd21cdd6 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -85,8 +85,8 @@ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u6 { /* @cur must be inside the folio. */ ASSERT(folio_pos(folio) <= cur); - ASSERT(cur < folio_end(folio)); - return min(range_end, folio_end(folio)) - cur; + ASSERT(cur < folio_next_pos(folio)); + return umin(range_end, folio_next_pos(folio)) - cur; } int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 7b277934f66f..a7f20f048398 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -254,10 +254,9 @@ again: range.extent_thresh = defrag->extent_thresh; file_ra_state_init(ra, inode->vfs_inode.i_mapping); - sb_start_write(fs_info->sb); - ret = btrfs_defrag_file(inode, ra, &range, defrag->transid, - BTRFS_DEFRAG_BATCH); - sb_end_write(fs_info->sb); + scoped_guard(super_write, fs_info->sb) + ret = btrfs_defrag_file(inode, ra, &range, + defrag->transid, BTRFS_DEFRAG_BATCH); iput(&inode->vfs_inode); if (ret < 0) @@ -886,7 +885,7 @@ again: } lock_start = folio_pos(folio); - lock_end = folio_end(folio) - 1; + lock_end = folio_next_pos(folio) - 1; /* Wait for any existing ordered extent in the range */ while (1) { struct btrfs_ordered_extent *ordered; @@ -1178,7 +1177,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode, if (!folio) break; - if (start >= folio_end(folio) || start + len <= folio_pos(folio)) + if (start >= folio_next_pos(folio) || + start + len <= folio_pos(folio)) continue; btrfs_folio_clamp_clear_checked(fs_info, folio, start, len); btrfs_folio_clamp_set_dirty(fs_info, folio, start, len); @@ -1219,7 +1219,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, folios[i] = NULL; goto free_folios; } - cur = folio_end(folios[i]); + cur = folio_next_pos(folios[i]); } for (int i = 0; i < nr_pages; i++) { if (!folios[i]) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 41e37f7f67cc..3df7b9d7fbe8 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -2110,9 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) for (int i = 0; i < count; i++) { __btrfs_kill_delayed_node(delayed_nodes[i]); + btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i], &delayed_node_trackers[i]); - btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); } } } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0d949edc0caf..b09d4ec8c77d 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -219,6 +219,13 @@ static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) return; + /* + * Only print if there are leaked references. The caller is + * holding one reference, so if refs == 1 there is no leak. + */ + if (refcount_read(&node->refs) == 1) + return; + ref_tracker_dir_print(&node->ref_dir.dir, BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c123a3ef154a..7361d5d890d2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -333,7 +333,7 @@ static noinline int lock_delalloc_folios(struct inode *inode, goto out; } range_start = max_t(u64, folio_pos(folio), start); - range_len = min_t(u64, folio_end(folio), end + 1) - range_start; + range_len = min_t(u64, folio_next_pos(folio), end + 1) - range_start; btrfs_folio_set_lock(fs_info, folio, range_start, range_len); processed_end = range_start + range_len - 1; @@ -387,7 +387,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, ASSERT(orig_end > orig_start); /* The range should at least cover part of the folio */ - ASSERT(!(orig_start >= folio_end(locked_folio) || + ASSERT(!(orig_start >= folio_next_pos(locked_folio) || orig_end <= folio_pos(locked_folio))); again: /* step one, find a bunch of delalloc bytes starting at start */ @@ -493,7 +493,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ASSERT(folio_pos(folio) <= start && - start + len <= folio_end(folio)); + start + len <= folio_next_pos(folio)); if (uptodate && btrfs_verify_folio(folio, start, len)) btrfs_folio_set_uptodate(fs_info, folio, start, len); @@ -973,7 +973,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl, { const u64 ra_pos = readahead_pos(ractl); const u64 ra_end = ra_pos + readahead_length(ractl); - const u64 em_end = em->start + em->ram_bytes; + const u64 em_end = em->start + em->len; /* No expansion for holes and inline extents. */ if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE) @@ -1201,7 +1201,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode, * finished our folio read and unlocked the folio. */ if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) { - u64 range_len = min(folio_end(folio), + u64 range_len = umin(folio_next_pos(folio), ordered->file_offset + ordered->num_bytes) - cur; ret = true; @@ -1223,7 +1223,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode, * So we return true and update @next_ret to the OE/folio boundary. */ if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { - u64 range_len = min(folio_end(folio), + u64 range_len = umin(folio_next_pos(folio), ordered->file_offset + ordered->num_bytes) - cur; /* @@ -2215,7 +2215,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; u64 range_start = max_t(u64, eb->start, folio_pos(folio)); - u32 range_len = min_t(u64, folio_end(folio), + u32 range_len = min_t(u64, folio_next_pos(folio), eb->start + eb->len) - range_start; folio_lock(folio); @@ -2228,6 +2228,14 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, wbc_account_cgroup_owner(wbc, folio, range_len); folio_unlock(folio); } + /* + * If the fs is already in error status, do not submit any writeback + * but immediately finish it. + */ + if (unlikely(BTRFS_FS_ERROR(fs_info))) { + btrfs_bio_end_io(bbio, errno_to_blk_status(BTRFS_FS_ERROR(fs_info))); + return; + } btrfs_submit_bbio(bbio, 0); } @@ -2460,10 +2468,7 @@ static int extent_write_cache_pages(struct address_space *mapping, &BTRFS_I(inode)->runtime_flags)) wbc->tagged_writepages = 1; - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag = PAGECACHE_TAG_TOWRITE; - else - tag = PAGECACHE_TAG_DIRTY; + tag = wbc_to_tag(wbc); retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); @@ -2619,7 +2624,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f continue; } - cur_end = min_t(u64, folio_end(folio) - 1, end); + cur_end = min_t(u64, folio_next_pos(folio) - 1, end); cur_len = cur_end + 1 - cur; ASSERT(folio_test_locked(folio)); @@ -3860,7 +3865,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; u64 range_start = max_t(u64, eb->start, folio_pos(folio)); - u32 range_len = min_t(u64, folio_end(folio), + u32 range_len = min_t(u64, folio_next_pos(folio), eb->start + eb->len) - range_start; bio_add_folio_nofail(&bbio->bio, folio, range_len, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7efd1f8a1912..e7453f992e1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -89,7 +89,8 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos num_bytes = round_up(write_bytes + pos - start_pos, fs_info->sectorsize); ASSERT(num_bytes <= U32_MAX); - ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes); + ASSERT(folio_pos(folio) <= pos && + folio_next_pos(folio) >= pos + write_bytes); end_of_last_block = start_pos + num_bytes - 1; @@ -799,7 +800,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 u64 len) { u64 clamp_start = max_t(u64, pos, folio_pos(folio)); - u64 clamp_end = min_t(u64, pos + len, folio_end(folio)); + u64 clamp_end = min_t(u64, pos + len, folio_next_pos(folio)); const u32 blocksize = inode_to_fs_info(inode)->sectorsize; int ret = 0; @@ -1254,8 +1255,8 @@ again: * The reserved range goes beyond the current folio, shrink the reserved * space to the folio boundary. */ - if (reserved_start + reserved_len > folio_end(folio)) { - const u64 last_block = folio_end(folio); + if (reserved_start + reserved_len > folio_next_pos(folio)) { + const u64 last_block = folio_next_pos(folio); shrink_reserved_space(inode, *data_reserved, reserved_start, reserved_len, last_block - reserved_start, @@ -2854,12 +2855,22 @@ static int btrfs_fallocate_update_isize(struct inode *inode, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + u64 range_start; + u64 range_end; int ret; int ret2; if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) return 0; + range_start = round_down(i_size_read(inode), root->fs_info->sectorsize); + range_end = round_up(end, root->fs_info->sectorsize); + + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), range_start, + range_end - range_start); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dad0b492a663..d86541073d42 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1106,14 +1106,15 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, * If ret is 1 (no key found), it means this is an empty block group, * without any extents allocated from it and there's no block group * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree - * because we are using the block group tree feature, so block group - * items are stored in the block group tree. It also means there are no - * extents allocated for block groups with a start offset beyond this - * block group's end offset (this is the last, highest, block group). + * because we are using the block group tree feature (so block group + * items are stored in the block group tree) or this is a new block + * group created in the current transaction and its block group item + * was not yet inserted in the extent tree (that happens in + * btrfs_create_pending_block_groups() -> insert_block_group_item()). + * It also means there are no extents allocated for block groups with a + * start offset beyond this block group's end offset (this is the last, + * highest, block group). */ - if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) - ASSERT(ret == 0); - start = block_group->start; end = block_group->start + block_group->length; while (ret == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b1b3a0553ee..9c6ca87b3d56 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9,6 +9,7 @@ #include <linux/blk-cgroup.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/fs_struct.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> @@ -177,8 +178,10 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, return ret; } ret = paths_from_inode(inum, ipath); - if (ret < 0) + if (ret < 0) { + btrfs_put_root(local_root); goto err; + } /* * We deliberately ignore the bit ipath might have been too small to @@ -409,7 +412,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, continue; } - index = folio_end(folio) >> PAGE_SHIFT; + index = folio_next_index(folio); /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle @@ -2336,7 +2339,8 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol * The range must cover part of the @locked_folio, or a return of 1 * can confuse the caller. */ - ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio))); + ASSERT(!(end <= folio_pos(locked_folio) || + start >= folio_next_pos(locked_folio))); if (should_nocow(inode, start, end)) { ret = run_delalloc_nocow(inode, locked_folio, start, end); @@ -2743,7 +2747,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) struct btrfs_inode *inode = fixup->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 page_start = folio_pos(folio); - u64 page_end = folio_end(folio) - 1; + u64 page_end = folio_next_pos(folio) - 1; int ret = 0; bool free_delalloc_space = true; @@ -3884,7 +3888,7 @@ static int btrfs_add_inode_to_root(struct btrfs_inode *inode, bool prealloc) ASSERT(ret != -ENOMEM); return ret; } else if (existing) { - WARN_ON(!(existing->vfs_inode.i_state & (I_WILL_FREE | I_FREEING))); + WARN_ON(!(inode_state_read_once(&existing->vfs_inode) & (I_WILL_FREE | I_FREEING))); } return 0; @@ -4855,7 +4859,7 @@ again: */ zero_start = max_t(u64, folio_pos(folio), start); - zero_end = folio_end(folio); + zero_end = folio_next_pos(folio); folio_zero_range(folio, zero_start - folio_pos(folio), zero_end - zero_start); @@ -5038,7 +5042,7 @@ again: * not reach disk, it still affects our page caches. */ zero_start = max_t(u64, folio_pos(folio), start); - zero_end = min_t(u64, folio_end(folio) - 1, end); + zero_end = min_t(u64, folio_next_pos(folio) - 1, end); } else { zero_start = max_t(u64, block_start, start); zero_end = min_t(u64, block_end, end); @@ -5361,7 +5365,7 @@ static void evict_inode_truncate_pages(struct inode *inode) struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct rb_node *node; - ASSERT(inode->i_state & I_FREEING); + ASSERT(inode_state_read_once(inode) & I_FREEING); truncate_inode_pages_final(&inode->i_data); btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false); @@ -5799,7 +5803,7 @@ struct btrfs_inode *btrfs_iget_path(u64 ino, struct btrfs_root *root, if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->vfs_inode.i_state & I_NEW)) + if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW)) return inode; ret = btrfs_read_locked_inode(inode, path); @@ -5823,7 +5827,7 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->vfs_inode.i_state & I_NEW)) + if (!(inode_state_read_once(&inode->vfs_inode) & I_NEW)) return inode; path = btrfs_alloc_path(); @@ -5837,6 +5841,8 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) if (ret) return ERR_PTR(ret); + if (S_ISDIR(inode->vfs_inode.i_mode)) + inode->vfs_inode.i_opflags |= IOP_FASTPERM_MAY_EXEC; unlock_new_inode(&inode->vfs_inode); return inode; } @@ -6289,8 +6295,8 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode) } /* - * This is a copy of file_update_time. We need this so we can return error on - * ENOSPC for updating the inode in the case of file write and mmap writes. + * We need our own ->update_time so that we can return error on ENOSPC for + * updating the inode in the case of file write and mmap writes. */ static int btrfs_update_time(struct inode *inode, int flags) { @@ -6788,8 +6794,11 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry, } ret = btrfs_create_new_inode(trans, &new_inode_args); - if (!ret) + if (!ret) { + if (S_ISDIR(inode->i_mode)) + inode->i_opflags |= IOP_FASTPERM_MAY_EXEC; d_instantiate_new(dentry, inode); + } btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); @@ -6873,7 +6882,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inode_inc_iversion(inode); inode_set_ctime_current(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); @@ -7480,7 +7488,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset, u64 page_start = folio_pos(folio); u64 page_end = page_start + folio_size(folio) - 1; u64 cur; - int inode_evicting = inode->vfs_inode.i_state & I_FREEING; + int inode_evicting = inode_state_read_once(&inode->vfs_inode) & I_FREEING; /* * We have folio locked so no new ordered extent can be created on this @@ -8709,15 +8717,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, - struct writeback_control *wbc, bool snapshot, - bool in_reclaim_context) +static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write, + bool snapshot, bool in_reclaim_context) { struct btrfs_delalloc_work *work, *next; LIST_HEAD(works); LIST_HEAD(splice); int ret = 0; - bool full_flush = wbc->nr_to_write == LONG_MAX; mutex_lock(&root->delalloc_mutex); spin_lock(&root->delalloc_lock); @@ -8743,10 +8749,10 @@ static int start_delalloc_inodes(struct btrfs_root *root, if (snapshot) set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags); - if (full_flush) { - work = btrfs_alloc_delalloc_work(&inode->vfs_inode); + if (nr_to_write == NULL) { + work = btrfs_alloc_delalloc_work(tmp_inode); if (!work) { - iput(&inode->vfs_inode); + iput(tmp_inode); ret = -ENOMEM; goto out; } @@ -8754,9 +8760,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, btrfs_queue_work(root->fs_info->flush_workers, &work->work); } else { - ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc); + ret = filemap_flush_nr(tmp_inode->i_mapping, + nr_to_write); btrfs_add_delayed_iput(inode); - if (ret || wbc->nr_to_write <= 0) + + if (ret || *nr_to_write <= 0) goto out; } cond_resched(); @@ -8782,29 +8790,17 @@ out: int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) { - struct writeback_control wbc = { - .nr_to_write = LONG_MAX, - .sync_mode = WB_SYNC_NONE, - .range_start = 0, - .range_end = LLONG_MAX, - }; struct btrfs_fs_info *fs_info = root->fs_info; if (BTRFS_FS_ERROR(fs_info)) return -EROFS; - - return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); + return start_delalloc_inodes(root, NULL, true, in_reclaim_context); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, bool in_reclaim_context) { - struct writeback_control wbc = { - .nr_to_write = nr, - .sync_mode = WB_SYNC_NONE, - .range_start = 0, - .range_end = LLONG_MAX, - }; + long *nr_to_write = nr == LONG_MAX ? NULL : &nr; struct btrfs_root *root; LIST_HEAD(splice); int ret; @@ -8816,13 +8812,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice)) { - /* - * Reset nr_to_write here so we know that we're doing a full - * flush. - */ - if (nr == LONG_MAX) - wbc.nr_to_write = LONG_MAX; - root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); @@ -8831,9 +8820,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); + ret = start_delalloc_inodes(root, nr_to_write, false, + in_reclaim_context); btrfs_put_root(root); - if (ret < 0 || wbc.nr_to_write <= 0) + if (ret < 0 || nr <= 0) goto out; spin_lock(&fs_info->delalloc_root_lock); } @@ -9169,6 +9159,11 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, min_size, actual_len, alloc_hint, trans); } +/* + * NOTE: in case you are adding MAY_EXEC check for directories: + * we are marking them with IOP_FASTPERM_MAY_EXEC, allowing path lookup to + * elide calls here. + */ static int btrfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 185bef0df1c2..b138120feba3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent, struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len); int ret; - ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); - if (ret == -EINTR) - return ret; - - dentry = lookup_one(idmap, qname, parent); - ret = PTR_ERR(dentry); + dentry = start_creating_killable(idmap, parent, qname); if (IS_ERR(dentry)) - goto out_unlock; + return PTR_ERR(dentry); ret = btrfs_may_create(idmap, dir, dentry); if (ret) @@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent, out_up_read: up_read(&fs_info->subvol_sem); out_dput: - dput(dentry); -out_unlock: - btrfs_inode_unlock(BTRFS_I(dir), 0); + end_creating(dentry); return ret; } @@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto free_subvol_name; } - ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); - if (ret == -EINTR) - goto free_subvol_name; - dentry = lookup_one(idmap, &QSTR(subvol_name), parent); + dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name)); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); - goto out_unlock_dir; - } - - if (d_really_is_negative(dentry)) { - ret = -ENOENT; - goto out_dput; + goto out_end_removing; } inode = d_inode(dentry); @@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, */ ret = -EPERM; if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) - goto out_dput; + goto out_end_removing; /* * Do not allow deletion if the parent dir is the same @@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, */ ret = -EINVAL; if (root == dest) - goto out_dput; + goto out_end_removing; ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC); if (ret) - goto out_dput; + goto out_end_removing; } /* check if subvolume may be deleted by a user */ ret = btrfs_may_delete(idmap, dir, dentry, 1); if (ret) - goto out_dput; + goto out_end_removing; if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; - goto out_dput; + goto out_end_removing; } btrfs_inode_lock(BTRFS_I(inode), 0); @@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, if (!ret) d_delete_notify(dir, dentry); -out_dput: - dput(dentry); -out_unlock_dir: - btrfs_inode_unlock(BTRFS_I(dir), 0); +out_end_removing: + end_removing(dentry); free_subvol_name: kfree(subvol_name_ptr); free_parent: @@ -3740,7 +3723,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); if (!prealloc) { ret = -ENOMEM; - goto drop_write; + goto out; } } diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h index 60f9b000d644..17b71e1285e5 100644 --- a/fs/btrfs/misc.h +++ b/fs/btrfs/misc.h @@ -209,9 +209,4 @@ static inline bool bitmap_test_range_all_zero(const unsigned long *addr, return (found_set == start + nbits); } -static inline u64 folio_end(struct folio *folio) -{ - return folio_pos(folio) + folio_size(folio); -} - #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2829f20d7bb5..7fedebbee558 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -359,7 +359,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, if (folio) { ASSERT(folio->mapping); ASSERT(folio_pos(folio) <= file_offset); - ASSERT(file_offset + len <= folio_end(folio)); + ASSERT(file_offset + len <= folio_next_pos(folio)); /* * Ordered flag indicates whether we still have diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1175b8192cd7..31ad8580322a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1539,8 +1539,10 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst ASSERT(prealloc); /* Check the level of src and dst first */ - if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) + if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) { + kfree(prealloc); return -EINVAL; + } mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index de4cb0f3fbd0..e9224145d754 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -982,7 +982,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) extent_root = btrfs_extent_root(fs_info, 0); /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */ - if (IS_ERR(extent_root)) { + if (!extent_root) { btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling"); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8dd8de6b9fb8..0765e06d00b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3780,6 +3780,7 @@ out: /* * Mark start of chunk relocation that is cancellable. Check if the cancellation * has been requested meanwhile and don't start in that case. + * NOTE: if this returns an error, reloc_chunk_end() must not be called. * * Return: * 0 success @@ -3796,10 +3797,8 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) if (atomic_read(&fs_info->reloc_cancel_req) > 0) { btrfs_info(fs_info, "chunk relocation canceled on start"); - /* - * On cancel, clear all requests but let the caller mark - * the end after cleanup operations. - */ + /* On cancel, clear all requests. */ + clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags); atomic_set(&fs_info->reloc_cancel_req, 0); return -ECANCELED; } @@ -3808,9 +3807,11 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) /* * Mark end of chunk relocation that is cancellable and wake any waiters. + * NOTE: call only if a previous call to reloc_chunk_start() succeeded. */ static void reloc_chunk_end(struct btrfs_fs_info *fs_info) { + ASSERT(test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)); /* Requested after start, clear bit first so any waiters can continue */ if (atomic_read(&fs_info->reloc_cancel_req) > 0) btrfs_info(fs_info, "chunk relocation canceled during operation"); @@ -4023,9 +4024,9 @@ out: if (err && rw) btrfs_dec_block_group_ro(rc->block_group); iput(rc->data_inode); + reloc_chunk_end(fs_info); out_put_bg: btrfs_put_block_group(bg); - reloc_chunk_end(fs_info); free_reloc_control(rc); return err; } @@ -4208,8 +4209,8 @@ out_clean: ret = ret2; out_unset: unset_reloc_control(rc); -out_end: reloc_chunk_end(fs_info); +out_end: free_reloc_control(rc); out: free_reloc_roots(&reloc_roots); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4691d0bdb2e8..ba20d9286a34 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -694,7 +694,7 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr) /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); return folio_address(folio) + offset_in_folio(folio, offset); } @@ -707,7 +707,7 @@ static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int secto /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); /* And the range must be contained inside the folio. */ ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio)); return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset); @@ -2203,6 +2203,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, &length, &bioc, NULL, NULL); if (ret < 0) { + bio_put(bio); btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); goto out; @@ -2212,6 +2213,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, btrfs_put_bioc(bioc); if (!rbio) { ret = -ENOMEM; + bio_put(bio); btrfs_bio_counter_dec(fs_info); goto out; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9230e5066fc6..96a030d28e09 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -178,7 +178,6 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; - struct fs_path cur_inode_path; bool cur_inode_new; bool cur_inode_new_gen; bool cur_inode_deleted; @@ -305,6 +304,9 @@ struct send_ctx { struct btrfs_lru_cache dir_created_cache; struct btrfs_lru_cache dir_utimes_cache; + + /* Must be last as it ends in a flexible-array member. */ + struct fs_path cur_inode_path; }; struct pending_dir_move { @@ -4100,6 +4102,48 @@ out: return ret; } +static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) +{ + const struct recorded_ref *data = k; + const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); + + if (data->dir > ref->dir) + return 1; + if (data->dir < ref->dir) + return -1; + if (data->dir_gen > ref->dir_gen) + return 1; + if (data->dir_gen < ref->dir_gen) + return -1; + return 0; +} + +static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent) +{ + const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node); + + return rbtree_check_dir_ref_comp(entry, parent) < 0; +} + +static int record_check_dir_ref_in_tree(struct rb_root *root, + struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *tmp_ref; + int ret; + + if (rb_find(ref, root, rbtree_check_dir_ref_comp)) + return 0; + + ret = dup_ref(ref, list); + if (ret < 0) + return ret; + + tmp_ref = list_last_entry(list, struct recorded_ref, list); + rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less); + tmp_ref->root = root; + return 0; +} + static int rename_current_inode(struct send_ctx *sctx, struct fs_path *current_path, struct fs_path *new_path) @@ -4127,11 +4171,11 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct recorded_ref *cur; struct recorded_ref *cur2; LIST_HEAD(check_dirs); + struct rb_root rbtree_check_dirs = RB_ROOT; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; u64 ow_mode; - u64 last_dir_ino_rm = 0; bool did_overwrite = false; bool is_orphan = false; bool can_rename = true; @@ -4435,7 +4479,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) goto out; } } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4463,7 +4507,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4473,7 +4517,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) * We have a moved dir. Add the old parent to check_dirs */ cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list); - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -4507,7 +4551,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (is_current_inode_path(sctx, cur->full_path)) fs_path_reset(&sctx->cur_inode_path); } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4550,8 +4594,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - } else if (ret == inode_state_did_delete && - cur->dir != last_dir_ino_rm) { + } else if (ret == inode_state_did_delete) { ret = can_rmdir(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; @@ -4563,7 +4606,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = send_rmdir(sctx, valid_path); if (ret < 0) goto out; - last_dir_ino_rm = cur->dir; } } } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 5ca8d4db6722..a7ba868e9372 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -186,7 +186,8 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, * unmapped page like dummy extent buffer pages. */ if (folio->mapping) - ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio), + ASSERT(folio_pos(folio) <= start && + start + len <= folio_next_pos(folio), "start=%llu len=%u folio_pos=%llu folio_size=%zu", start, len, folio_pos(folio), folio_size(folio)); } @@ -217,7 +218,7 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) if (folio_pos(folio) >= orig_start + orig_len) *len = 0; else - *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start; + *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start; } static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d6e496436539..430e7419349c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1900,8 +1900,6 @@ static int btrfs_get_tree_super(struct fs_context *fc) return PTR_ERR(sb); } - set_device_specific_options(fs_info); - if (sb->s_root) { /* * Not the first mount of the fs thus got an existing super block. @@ -1946,6 +1944,7 @@ static int btrfs_get_tree_super(struct fs_context *fc) deactivate_locked_super(sb); return -EACCES; } + set_device_specific_options(fs_info); bdev = fs_devices->latest_dev->bdev; snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); @@ -2069,7 +2068,13 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { - btrfs_free_fs_info(fs_info); + /* + * Dont call btrfs_free_fs_info() to free it as it's still + * initialized partially. + */ + kfree(fs_info->super_copy); + kfree(fs_info->super_for_commit); + kvfree(fs_info); return -ENOMEM; } btrfs_init_fs_info(fs_info); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ca30b15ea452..c10b4c242acf 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1797,7 +1797,7 @@ static int check_inode_extref(struct extent_buffer *leaf, struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr; u16 namelen; - if (unlikely(ptr + sizeof(*extref)) > end) { + if (unlikely(ptr + sizeof(*extref) > end)) { inode_ref_err(leaf, slot, "inode extref overflow, ptr %lu end %lu inode_extref size %zu", ptr, end, sizeof(*extref)); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 621e0df097e3..30f3c3b849c1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7122,7 +7122,7 @@ log_extents: * a power failure unless the log was synced as part of an fsync * against any other unrelated inode. */ - if (inode_only != LOG_INODE_EXISTS) + if (!ctx->logging_new_name && inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); @@ -7910,6 +7910,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + /* The inode has a new name (ref/extref), so make sure we log it. */ + set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + btrfs_init_log_ctx(&ctx, inode); ctx.logging_new_name = true; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2bec544d8ba3..cc8aa4a04348 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2002,14 +2002,11 @@ out: static void update_dev_time(const char *device_path) { struct path path; - int ret; - - ret = kern_path(device_path, LOOKUP_FOLLOW, &path); - if (ret) - return; - inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION); - path_put(&path); + if (!kern_path(device_path, LOOKUP_FOLLOW, &path)) { + vfs_utimes(&path, NULL); + path_put(&path); + } } static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, @@ -4660,12 +4657,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; - sb_start_write(fs_info->sb); + guard(super_write)(fs_info->sb); + mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); - sb_end_write(fs_info->sb); return ret; } @@ -8177,12 +8174,12 @@ static int relocating_repair_kthread(void *data) target = cache->start; btrfs_put_block_group(cache); - sb_start_write(fs_info->sb); + guard(super_write)(fs_info->sb); + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { btrfs_info(fs_info, "zoned: skip relocating block group %llu to repair: EBUSY", target); - sb_end_write(fs_info->sb); return -EBUSY; } @@ -8210,7 +8207,6 @@ out: btrfs_put_block_group(cache); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); - sb_end_write(fs_info->sb); return ret; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e00036672f33..d1db7fa1fe58 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1317,6 +1317,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, if (!btrfs_dev_is_sequential(device, info->physical)) { up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; + info->capacity = device->zone_info->zone_size; return 0; } @@ -1522,6 +1523,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1529,28 +1532,26 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, map->num_stripes); - div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > i) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == i) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if (test_bit(0, active) != test_bit(i, active)) { @@ -1574,6 +1575,8 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1581,6 +1584,14 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes / map->sub_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; @@ -1594,26 +1605,12 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, - map->num_stripes / map->sub_stripes); - div_u64_rem(stripe_nr, - (map->num_stripes / map->sub_stripes), - &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > (i / map->sub_stripes)) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == (i / map->sub_stripes)) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if ((i % map->sub_stripes) == 0) { @@ -1683,8 +1680,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); if (num_conventional > 0) { - /* Zone capacity is always zone size in emulation */ - cache->zone_capacity = cache->length; ret = calculate_alloc_pointer(cache, &last_alloc, new); if (ret) { btrfs_err(fs_info, @@ -1693,6 +1688,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; + cache->zone_capacity = cache->length; set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); goto out; } @@ -1753,7 +1749,7 @@ out: !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); - return -EINVAL; + ret = -EINVAL; } if (unlikely(cache->alloc_offset > cache->zone_capacity)) { |
