summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/Makefile5
-rw-r--r--fs/ext4/crypto.c9
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_extents.h12
-rw-r--r--fs/ext4/extents-test.c12
-rw-r--r--fs/ext4/extents.c80
-rw-r--r--fs/ext4/fast_commit.c17
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c75
-rw-r--r--fs/ext4/mballoc-test.c81
-rw-r--r--fs/ext4/mballoc.c132
-rw-r--r--fs/ext4/mballoc.h30
-rw-r--r--fs/ext4/page-io.c10
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/ext4/sysfs.c10
-rw-r--r--fs/jbd2/checkpoint.c15
18 files changed, 449 insertions, 114 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 72206a292676..3baee4e7c1cf 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
-ext4-inode-test-objs += inode-test.o
-obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o
+ext4-test-objs += inode-test.o mballoc-test.o \
+ extents-test.o
+obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o
ext4-$(CONFIG_FS_VERITY) += verity.o
ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index cf0a0970c095..f41f320f4437 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
*/
if (handle) {
+ /*
+ * Since the inode is new it is ok to pass the
+ * XATTR_CREATE flag. This is necessary to match the
+ * remaining journal credits check in the set_handle
+ * function with the credits allocated for the new
+ * inode.
+ */
res = ext4_xattr_set_handle(handle, inode,
EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
- ctx, len, 0);
+ ctx, len, XATTR_CREATE);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 293f698b7042..7617e2d454ea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1570,6 +1570,7 @@ struct ext4_sb_info {
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
+ struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */
struct super_block *s_sb;
struct buffer_head *s_mmp_bh;
@@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode)
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
loff_t pos, unsigned len,
get_block_t *get_block);
+
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \
+ EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test")
+#endif
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index c484125d963f..ebaf7cc42430 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
0xffff);
}
+extern int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path);
+extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern int ext4_ext_space_root_idx_test(struct inode *inode, int check);
+extern struct ext4_ext_path *ext4_split_convert_extents_test(
+ handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ int flags, unsigned int *allocated);
+#endif
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c
index 7c4690eb7dad..5496b2c8e2cd 100644
--- a/fs/ext4/extents-test.c
+++ b/fs/ext4/extents-test.c
@@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = {
static void extents_kunit_exit(struct kunit *test)
{
- struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info;
+ struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb;
+ struct ext4_sb_info *sbi = sb->s_fs_info;
+ ext4_es_unregister_shrinker(sbi);
kfree(sbi);
kfree(k_ctx.k_ei);
kfree(k_ctx.k_data);
@@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test)
eh->eh_depth = 0;
eh->eh_entries = cpu_to_le16(1);
eh->eh_magic = EXT4_EXT_MAGIC;
- eh->eh_max =
- cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0));
+ eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test(
+ &k_ctx.k_ei->vfs_inode, 0));
eh->eh_generation = 0;
/*
@@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test)
switch (param->type) {
case TEST_SPLIT_CONVERT:
- path = ext4_split_convert_extents(NULL, inode, &map, path,
- param->split_flags, NULL);
+ path = ext4_split_convert_extents_test(NULL, inode, &map,
+ path, param->split_flags, NULL);
break;
case TEST_CREATE_BLOCKS:
ext4_map_create_blocks_helper(test, inode, &map, param->split_flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ae3804f36535..8cce1479be6d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
* - ENOMEM
* - EIO
*/
-static int __ext4_ext_dirty(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
{
int err;
@@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
return err;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ return -EFSCORRUPTED;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
goto clean;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ err = -EFSCORRUPTED;
+ goto clean;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
}
/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
@@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
insert_err = PTR_ERR(path);
err = 0;
+ if (insert_err != -ENOSPC && insert_err != -EDQUOT &&
+ insert_err != -ENOMEM)
+ goto out_path;
/*
* Get a new path to try to zeroout or fix the extent length.
@@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
goto out_path;
}
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode,
+ "bad extent address lblock: %lu, depth: %d pblock %llu",
+ (unsigned long)ee_block, depth, path[depth].p_block);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
-
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
@@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode,
ext4_ext_mark_initialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
return err;
@@ -4457,9 +4482,13 @@ got_allocated_blocks:
path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- if (allocated_clusters) {
+ /*
+ * Gracefully handle out of space conditions. If the filesystem
+ * is inconsistent, we'll just leak allocated blocks to avoid
+ * causing even more damage.
+ */
+ if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) {
int fb_flags = 0;
-
/*
* free data blocks we just allocated.
* not a good idea to call discard here directly,
@@ -6238,6 +6267,33 @@ out:
return 0;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "extents-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+int ext4_ext_space_root_idx_test(struct inode *inode, int check)
+{
+ return ext4_ext_space_root_idx(inode, check);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test);
+
+struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle,
+ struct inode *inode, struct ext4_map_blocks *map,
+ struct ext4_ext_path *path, int flags,
+ unsigned int *allocated)
+{
+ return ext4_split_convert_extents(handle, inode, map, path,
+ flags, allocated);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks);
#endif
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f575751f1cae..2f0057e04934 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal)
int ret = 0;
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_submit_inode_data(journal, ei->jinode);
+ ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_wait_inode_data(journal, ei->jinode);
+ ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
@@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb,
/* Immediately update the inode on disk. */
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = sync_dirty_buffer(iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = ext4_mark_inode_used(sb, ino);
if (ret)
- goto out;
+ goto out_brelse;
/* Given that we just wrote the inode on disk, this SHOULD succeed. */
inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
ext4_debug("Inode not found.");
- return -EFSCORRUPTED;
+ inode = NULL;
+ ret = -EFSCORRUPTED;
+ goto out_brelse;
}
/*
@@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb,
ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
sync_dirty_buffer(iloc.bh);
+out_brelse:
brelse(iloc.bh);
out:
iput(inode);
if (!ret)
blkdev_issue_flush(sb->s_bdev);
- return 0;
+ return ret;
}
/*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e476c6de3074..bd8f230fa507 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
int datasync, bool *needs_barrier)
{
struct inode *inode = file->f_inode;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ };
int ret;
ret = generic_buffers_fsync_noflush(file, start, end, datasync);
- if (!ret)
- ret = ext4_sync_parent(inode);
+ if (ret)
+ return ret;
+
+ /* Force writeout of inode table buffer to disk */
+ ret = ext4_write_inode(inode, &wbc);
+ if (ret)
+ return ret;
+
+ ret = ext4_sync_parent(inode);
+
if (test_opt(inode->i_sb, BARRIER))
*needs_barrier = true;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b20a1bf866ab..b1bc1950c9f0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
if (unlikely(!gdp))
return 0;
+ /* Inode was never used in this filesystem? */
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) ||
+ ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp)))
+ return 0;
+
bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (!bh || !buffer_uptodate(bh))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1f6bc05593df..408677fa8196 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
goto out;
len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
- BUG_ON(len > PAGE_SIZE);
+
+ if (len > PAGE_SIZE) {
+ ext4_error_inode(inode, __func__, __LINE__, 0,
+ "inline size %zu exceeds PAGE_SIZE", len);
+ ret = -EFSCORRUPTED;
+ brelse(iloc.bh);
+ goto out;
+ }
+
kaddr = kmap_local_folio(folio, 0);
ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
kaddr = folio_zero_tail(folio, len, kaddr + len);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..1123d995494b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
+ struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode);
+
trace_ext4_begin_ordered_truncate(inode, new_size);
/*
* If jinode is zero, then we never opened the file for
@@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
* jbd2_journal_begin_ordered_truncate() since there's no
* outstanding writes we need to flush.
*/
- if (!EXT4_I(inode)->jinode)
+ if (!jinode)
return 0;
return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
- EXT4_I(inode)->jinode,
+ jinode,
new_size);
}
@@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode)
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
+ /*
+ * If there's dirty page will lead to data loss, user
+ * could see stale data.
+ */
+ if (unlikely(!ext4_emergency_state(inode->i_sb) &&
+ mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY)))
+ ext4_warning_inode(inode, "data will be lost");
+
truncate_inode_pages_final(&inode->i_data);
goto no_delete;
@@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode)
spin_unlock(&inode->i_lock);
return -ENOMEM;
}
- ei->jinode = jinode;
- jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jbd2_journal_init_jbd_inode(jinode, inode);
+ /*
+ * Publish ->jinode only after it is fully initialized so that
+ * readers never observe a partially initialized jbd2_inode.
+ */
+ smp_wmb();
+ WRITE_ONCE(ei->jinode, jinode);
jinode = NULL;
}
spin_unlock(&inode->i_lock);
@@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode->i_op = &ext4_encrypted_symlink_inode_operations;
} else if (ext4_inode_is_fast_symlink(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
- if (inode->i_size == 0 ||
- inode->i_size >= sizeof(ei->i_data) ||
- strnlen((char *)ei->i_data, inode->i_size + 1) !=
- inode->i_size) {
- ext4_error_inode(inode, function, line, 0,
- "invalid fast symlink length %llu",
- (unsigned long long)inode->i_size);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+
+ /*
+ * Orphan cleanup can see inodes with i_size == 0
+ * and i_data uninitialized. Skip size checks in
+ * that case. This is safe because the first thing
+ * ext4_evict_inode() does for fast symlinks is
+ * clearing of i_data and i_size.
+ */
+ if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+ if (inode->i_nlink != 0) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid orphan symlink nlink %d",
+ inode->i_nlink);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ } else {
+ if (inode->i_size == 0 ||
+ inode->i_size >= sizeof(ei->i_data) ||
+ strnlen((char *)ei->i_data, inode->i_size + 1) !=
+ inode->i_size) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid fast symlink length %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ inode_set_cached_link(inode, (char *)ei->i_data,
+ inode->i_size);
}
- inode_set_cached_link(inode, (char *)ei->i_data,
- inode->i_size);
} else {
inode->i_op = &ext4_symlink_inode_operations;
}
@@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (attr->ia_size == inode->i_size)
inc_ivers = false;
+ /*
+ * If file has inline data but new size exceeds inline capacity,
+ * convert to extent-based storage first to prevent inconsistent
+ * state (inline flag set but size exceeds inline capacity).
+ */
+ if (ext4_has_inline_data(inode) &&
+ attr->ia_size > EXT4_I(inode)->i_inline_size) {
+ error = ext4_convert_inline_data(inode);
+ if (error)
+ goto err_out;
+ }
+
if (shrink) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 9fbdf6a09489..6f5bfbb0e8a4 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -8,6 +8,7 @@
#include <linux/random.h>
#include "ext4.h"
+#include "mballoc.h"
struct mbt_grp_ctx {
struct buffer_head bitmap_bh;
@@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state,
if (state)
mb_set_bits(bitmap_bh->b_data, blkoff, len);
else
- mb_clear_bits(bitmap_bh->b_data, blkoff, len);
+ mb_clear_bits_test(bitmap_bh->b_data, blkoff, len);
return 0;
}
@@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block at goal */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal, found,
"failed to alloc block at goal, expected %llu found %llu",
ar.goal, found);
/* get block after goal in goal group */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found,
"failed to alloc block after goal in goal group, expected %llu found %llu",
ar.goal + 1, found);
@@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block after goal group */
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, goal_group + 1), found,
"failed to alloc block after goal group, expected %llu found %llu",
@@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = goal_group; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found,
"failed to alloc block before goal group, expected %llu found %llu",
@@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = 0; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_NE_MSG(test, err, 0,
"unexpectedly get block when no block is available");
}
@@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb,
continue;
bitmap = mbt_ctx_bitmap(sb, i);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ_MSG(test, bit, max,
"free block on unexpected group %d", i);
}
bitmap = mbt_ctx_bitmap(sb, goal_group);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, bit, start);
- bit = mb_find_next_bit(bitmap, max, bit + 1);
+ bit = mb_find_next_bit_test(bitmap, max, bit + 1);
KUNIT_ASSERT_EQ(test, bit, start + len);
}
@@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group,
block = ext4_group_first_block_no(sb, goal_group) +
EXT4_C2B(sbi, start);
- ext4_free_blocks_simple(inode, block, len);
+ ext4_free_blocks_simple_test(inode, block, len);
validate_free_blocks_simple(test, sb, goal_group, start, len);
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
}
@@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test,
bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP);
memset(bitmap, 0, sb->s_blocksize);
- ret = ext4_mb_mark_diskspace_used(ac, NULL);
+ ret = ext4_mb_mark_diskspace_used_test(ac, NULL);
KUNIT_ASSERT_EQ(test, ret, 0);
max = EXT4_CLUSTERS_PER_GROUP(sb);
- i = mb_find_next_bit(bitmap, max, 0);
+ i = mb_find_next_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, i, start);
- i = mb_find_next_zero_bit(bitmap, max, i + 1);
+ i = mb_find_next_zero_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, i, start + len);
- i = mb_find_next_bit(bitmap, max, i + 1);
+ i = mb_find_next_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, max, i);
}
@@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy,
max = EXT4_CLUSTERS_PER_GROUP(sb);
bb_h = buddy + sbi->s_mb_offsets[1];
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
grp->bb_first_free = off;
while (off < max) {
grp->bb_counters[0]++;
grp->bb_free++;
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
grp->bb_free++;
grp->bb_counters[0]--;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[1]++;
grp->bb_largest_free_order = 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) {
bb = buddy + sbi->s_mb_offsets[order];
bb_h = buddy + sbi->s_mb_offsets[order + 1];
max = max >> 1;
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
while (off < max) {
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
mb_set_bits(bb, off, 2);
grp->bb_counters[order] -= 2;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[order + 1]++;
grp->bb_largest_free_order = order + 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
}
max = EXT4_CLUSTERS_PER_GROUP(sb);
- off = mb_find_next_zero_bit(bitmap, max, 0);
+ off = mb_find_next_zero_bit_test(bitmap, max, 0);
while (off < max) {
grp->bb_fragments++;
- off = mb_find_next_bit(bitmap, max, off + 1);
+ off = mb_find_next_bit_test(bitmap, max, off + 1);
if (off + 1 >= max)
break;
- off = mb_find_next_zero_bit(bitmap, max, off + 1);
+ off = mb_find_next_zero_bit_test(bitmap, max, off + 1);
}
}
@@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap,
/* needed by validation in ext4_mb_generate_buddy */
ext4_grp->bb_free = mbt_grp->bb_free;
memset(ext4_buddy, 0xff, sb->s_blocksize);
- ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
+ ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
ext4_grp);
KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize),
@@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(e4b, &ex);
+ mb_mark_used_test(e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
mb_set_bits(bitmap, start, len);
@@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
@@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test)
test_mb_mark_used_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static void
@@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b,
return;
ext4_lock_group(sb, e4b->bd_group);
- mb_free_blocks(NULL, e4b, start, len);
+ mb_free_blocks_test(NULL, e4b, start, len);
ext4_unlock_group(sb, e4b->bd_group);
- mb_clear_bits(bitmap, start, len);
+ mb_clear_bits_test(bitmap, start, len);
/* bypass bb_free validatoin in ext4_mb_generate_buddy */
grp->bb_free += len;
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_start = 0;
@@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test)
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
grp->bb_free = 0;
@@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test)
test_mb_free_blocks_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
#define COUNT_FOR_ESTIMATE 100000
@@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
if (sb->s_blocksize > PAGE_SIZE)
kunit_skip(test, "blocksize exceeds pagesize");
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_group = TEST_GOAL_GROUP;
@@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
ex.fe_start = ranges[i].start;
ex.fe_len = ranges[i].len;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
end = jiffies;
@@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test)
continue;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_free_blocks(NULL, &e4b, ranges[i].start,
+ mb_free_blocks_test(NULL, &e4b, ranges[i].start,
ranges[i].len);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
}
kunit_info(test, "costed jiffies %lu\n", all);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..bb58eafb87bc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac)
/* searching for the right group start from the goal value specified */
start = ac->ac_g_ex.fe_group;
+ if (start >= ngroups)
+ start = 0;
ac->ac_prefetch_grp = start;
ac->ac_prefetch_nr = 0;
@@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
return 0;
err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
- if (err)
+ if (err) {
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) &&
+ !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ return 0;
return err;
+ }
ext4_lock_group(ac->ac_sb, group);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
@@ -3580,9 +3586,7 @@ err_freebuddy:
rcu_read_unlock();
iput(sbi->s_buddy_cache);
err_freesgi:
- rcu_read_lock();
- kvfree(rcu_dereference(sbi->s_group_info));
- rcu_read_unlock();
+ kvfree(rcu_access_pointer(sbi->s_group_info));
return -ENOMEM;
}
@@ -3889,15 +3893,14 @@ void ext4_mb_release(struct super_block *sb)
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
int count;
- if (test_opt(sb, DISCARD)) {
- /*
- * wait the discard work to drain all of ext4_free_data
- */
- flush_work(&sbi->s_discard_work);
- WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- }
+ /*
+ * wait the discard work to drain all of ext4_free_data
+ */
+ flush_work(&sbi->s_discard_work);
+ WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- if (sbi->s_group_info) {
+ group_info = rcu_access_pointer(sbi->s_group_info);
+ if (group_info) {
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
@@ -3915,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb)
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
- rcu_read_lock();
- group_info = rcu_dereference(sbi->s_group_info);
for (i = 0; i < num_meta_group_infos; i++)
kfree(group_info[i]);
kvfree(group_info);
- rcu_read_unlock();
}
ext4_mb_avg_fragment_size_destroy(sbi);
ext4_mb_largest_free_orders_destroy(sbi);
@@ -4084,7 +4084,7 @@ void ext4_exit_mballoc(void)
#define EXT4_MB_BITMAP_MARKED_CHECK 0x0001
#define EXT4_MB_SYNC_UPDATE 0x0002
-static int
+int
ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state,
ext4_group_t group, ext4_grpblk_t blkoff,
ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed)
@@ -7188,6 +7188,102 @@ out_unload:
return error;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "mballoc-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+void mb_clear_bits_test(void *bm, int cur, int len)
+{
+ mb_clear_bits(bm, cur, len);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test);
+
+ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp)
+{
+ return ext4_mb_new_blocks_simple(ar, errp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test);
+
+int mb_find_next_zero_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_zero_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test);
+
+int mb_find_next_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test);
+
+void mb_clear_bit_test(int bit, void *addr)
+{
+ mb_clear_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test);
+
+int mb_test_bit_test(int bit, void *addr)
+{
+ return mb_test_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test);
+
+int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle)
+{
+ return ext4_mb_mark_diskspace_used(ac, handle);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test);
+
+int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
+{
+ return mb_mark_used(e4b, ex);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test);
+
+void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy,
+ void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp)
+{
+ ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test);
+
+int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy(sb, group, e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test);
+
+void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b)
+{
+ ext4_mb_unload_buddy(e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test);
+
+void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b,
+ int first, int count)
+{
+ mb_free_blocks(inode, e4b, first, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test);
+
+void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block,
+ unsigned long count)
+{
+ return ext4_free_blocks_simple(inode, block, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait);
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context);
#endif
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 15a049f05d04..39333ce72cbd 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -270,4 +270,34 @@ ext4_mballoc_query_range(
ext4_mballoc_query_range_fn formatter,
void *priv);
+extern int ext4_mb_mark_context(handle_t *handle,
+ struct super_block *sb, bool state,
+ ext4_group_t group, ext4_grpblk_t blkoff,
+ ext4_grpblk_t len, int flags,
+ ext4_grpblk_t *ret_changed);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern void mb_clear_bits_test(void *bm, int cur, int len);
+extern ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp);
+extern int mb_find_next_zero_bit_test(void *addr, int max, int start);
+extern int mb_find_next_bit_test(void *addr, int max, int start);
+extern void mb_clear_bit_test(int bit, void *addr);
+extern int mb_test_bit_test(int bit, void *addr);
+extern int
+ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle);
+extern int mb_mark_used_test(struct ext4_buddy *e4b,
+ struct ext4_free_extent *ex);
+extern void ext4_mb_generate_buddy_test(struct super_block *sb,
+ void *buddy, void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp);
+extern int ext4_mb_load_buddy_test(struct super_block *sb,
+ ext4_group_t group, struct ext4_buddy *e4b);
+extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b);
+extern void mb_free_blocks_test(struct inode *inode,
+ struct ext4_buddy *e4b, int first, int count);
+extern void ext4_free_blocks_simple_test(struct inode *inode,
+ ext4_fsblk_t block, unsigned long count);
+#endif
#endif
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a8c95eee91b7..39fe50b3c662 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
- /* Nothing to submit? Just unlock the folio... */
- if (!nr_to_submit)
+ if (!nr_to_submit) {
+ /*
+ * We have nothing to submit. Just cycle the folio through
+ * writeback state to properly update xarray tags.
+ */
+ __folio_start_writeback(folio, keep_towrite);
+ folio_end_writeback(folio);
return 0;
+ }
bh = head = folio_buffers(folio);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43f680c750ae..a34efb44e73d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi)
struct buffer_head **group_desc;
int i;
- rcu_read_lock();
- group_desc = rcu_dereference(sbi->s_group_desc);
+ group_desc = rcu_access_pointer(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
- rcu_read_unlock();
}
static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
@@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
struct flex_groups **flex_groups;
int i;
- rcu_read_lock();
- flex_groups = rcu_dereference(sbi->s_flex_groups);
+ flex_groups = rcu_access_pointer(sbi->s_flex_groups);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
- rcu_read_unlock();
}
static void ext4_put_super(struct super_block *sb)
@@ -1527,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode)
invalidate_inode_buffers(inode);
clear_inode(inode);
ext4_discard_preallocations(inode);
+ /*
+ * We must remove the inode from the hash before ext4_free_inode()
+ * clears the bit in inode bitmap as otherwise another process reusing
+ * the inode will block in insert_inode_hash() waiting for inode
+ * eviction to complete while holding transaction handle open, but
+ * ext4_evict_inode() still running for that inode could block waiting
+ * for transaction commit if the inode is marked as IS_SYNC => deadlock.
+ *
+ * Removing the inode from the hash here is safe. There are two cases
+ * to consider:
+ * 1) The inode still has references to it (i_nlink > 0). In that case
+ * we are keeping the inode and once we remove the inode from the hash,
+ * iget() can create the new inode structure for the same inode number
+ * and we are fine with that as all IO on behalf of the inode is
+ * finished.
+ * 2) We are deleting the inode (i_nlink == 0). In that case inode
+ * number cannot be reused until ext4_free_inode() clears the bit in
+ * the inode bitmap, at which point all IO is done and reuse is fine
+ * again.
+ */
+ remove_inode_hash(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -3633,6 +3650,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
"extents feature\n");
return 0;
}
+ if (ext4_has_feature_bigalloc(sb) &&
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+ ext4_msg(sb, KERN_WARNING,
+ "bad geometry: bigalloc file system with non-zero "
+ "first_data_block\n");
+ return 0;
+ }
#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
if (!readonly && (ext4_has_feature_quota(sb) ||
@@ -5403,6 +5427,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
+ mutex_init(&sbi->s_error_notify_mutex);
INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b87d7bdab06a..923b375e017f 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = {
void ext4_notify_error_sysfs(struct ext4_sb_info *sbi)
{
- sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_lock(&sbi->s_error_notify_mutex);
+ if (sbi->s_kobj.state_in_sysfs)
+ sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
static struct kobject *ext4_root;
@@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb)
int err;
init_completion(&sbi->s_kobj_unregister);
+ mutex_lock(&sbi->s_error_notify_mutex);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root,
"%s", sb->s_id);
+ mutex_unlock(&sbi->s_error_notify_mutex);
if (err) {
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
@@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb)
if (sbi->s_proc)
remove_proc_subtree(sb->s_id, ext4_proc_root);
+
+ mutex_lock(&sbi->s_error_notify_mutex);
kobject_del(&sbi->s_kobj);
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
int __init ext4_init_sysfs(void)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index de89c5bef607..1508e2f54462 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -267,7 +267,15 @@ restart:
*/
BUFFER_TRACE(bh, "queue");
get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ if (WARN_ON_ONCE(buffer_jwrite(bh))) {
+ put_bh(bh); /* drop the ref we just took */
+ spin_unlock(&journal->j_list_lock);
+ /* Clean up any previously batched buffers */
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
journal->j_chkpt_bhs[batch_count++] = bh;
transaction->t_chp_stats.cs_written++;
transaction->t_checkpoint_list = jh->b_cpnext;
@@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
- J_ASSERT(blocknr != 0);
+ if (WARN_ON_ONCE(blocknr == 0)) {
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
/*
* We need to make sure that any blocks that were recently written out