From 15a119e09344a346384ec05c781c126a29b18235 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 23 Sep 2020 01:12:31 +0800 Subject: jbd2: fix the comment of struct jbd2_journal_handle the struct name was modified long ago, but the comment still use struct handle_s. Signed-off-by: Hui Su Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200922171231.GA53120@rlk Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 08f904943ab2..a1ef05412acf 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -452,8 +452,8 @@ struct jbd2_inode { struct jbd2_revoke_table_s; /** - * struct handle_s - The handle_s type is the concrete type associated with - * handle_t. + * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete + * type associated with handle_t. * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. -- cgit v1.2.3 From aa3c0c61f62d682259e3e66cdc01846290f9cd6c Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:38 -0300 Subject: jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers() Export functions that implement the current behavior done for an inode in journal_submit|finish_inode_data_buffers(). No functional change. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-2-mfo@canonical.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 36 ++++++++++++++++-------------------- fs/jbd2/journal.c | 2 ++ include/linux/jbd2.h | 4 ++++ 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6d2da8ad0e6f..f79b86b4241f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -187,19 +187,17 @@ static int journal_wait_on_commit_record(journal_t *journal, * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping, - loff_t dirty_start, loff_t dirty_end) +int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) { - int ret; + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = dirty_start, - .range_end = dirty_end, + .range_start = jinode->i_dirty_start, + .range_end = jinode->i_dirty_end, }; - ret = generic_writepages(mapping, &wbc); - return ret; + return generic_writepages(mapping, &wbc); } /* @@ -215,16 +213,11 @@ static int journal_submit_data_buffers(journal_t *journal, { struct jbd2_inode *jinode; int err, ret = 0; - struct address_space *mapping; spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { - loff_t dirty_start = jinode->i_dirty_start; - loff_t dirty_end = jinode->i_dirty_end; - if (!(jinode->i_flags & JI_WRITE_DATA)) continue; - mapping = jinode->i_vfs_inode->i_mapping; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); /* @@ -234,8 +227,7 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping, dirty_start, - dirty_end); + err = jbd2_journal_submit_inode_data_buffers(jinode); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -248,6 +240,15 @@ static int journal_submit_data_buffers(journal_t *journal, return ret; } +int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; + + return filemap_fdatawait_range_keep_errors(mapping, + jinode->i_dirty_start, + jinode->i_dirty_end); +} + /* * Wait for data submitted for writeout, refile inodes to proper * transaction if needed. @@ -262,16 +263,11 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { - loff_t dirty_start = jinode->i_dirty_start; - loff_t dirty_end = jinode->i_dirty_end; - if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_range_keep_errors( - jinode->i_vfs_inode->i_mapping, dirty_start, - dirty_end); + err = jbd2_journal_finish_inode_data_buffers(jinode); if (!ret) ret = err; spin_lock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 17fdc482f554..c0600405e7a2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -91,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); +EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers); +EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a1ef05412acf..8b7b06066bc2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1421,6 +1421,10 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle, extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); +extern int jbd2_journal_submit_inode_data_buffers( + struct jbd2_inode *jinode); +extern int jbd2_journal_finish_inode_data_buffers( + struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 342af94ec6c02aa478fe2adcd41b950e154b03ba Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:39 -0300 Subject: jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers() Introduce journal callbacks to allow different behaviors for an inode in journal_submit|finish_inode_data_buffers(). The existing users of the current behavior (ext4, ocfs2) are adapted to use the previously exported functions that implement the current behavior. Users are callers of jbd2_journal_inode_ranged_write|wait(), which adds the inode to the transaction's inode list with the JI_WRITE|WAIT_DATA flags. Only ext4 and ocfs2 in-tree. Both CONFIG_EXT4_FS and CONFIG_OCSFS2_FS select CONFIG_JBD2, which builds fs/jbd2/commit.c and journal.c that define and export the functions, so we can call directly in ext4/ocfs2. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-3-mfo@canonical.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 ++++ fs/jbd2/commit.c | 30 ++++++++++++++++++------------ fs/ocfs2/journal.c | 4 ++++ include/linux/jbd2.h | 25 ++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0ee673e8e261..a3e57f554f1b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4752,6 +4752,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; + sbi->s_journal->j_submit_inode_data_buffers = + jbd2_journal_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + jbd2_journal_finish_inode_data_buffers; no_journal: if (!test_opt(sb, NO_MBCACHE)) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f79b86b4241f..6252b4c50666 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -197,6 +197,12 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) .range_end = jinode->i_dirty_end, }; + /* + * submit the inode data buffers. We use writepage + * instead of writepages. Because writepages can do + * block allocation with delalloc. We need to write + * only allocated blocks here. + */ return generic_writepages(mapping, &wbc); } @@ -220,16 +226,13 @@ static int journal_submit_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - /* - * submit the inode data buffers. We use writepage - * instead of writepages. Because writepages can do - * block allocation with delalloc. We need to write - * only allocated blocks here. - */ + /* submit the inode data buffers. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = jbd2_journal_submit_inode_data_buffers(jinode); - if (!ret) - ret = err; + if (journal->j_submit_inode_data_buffers) { + err = journal->j_submit_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); jinode->i_flags &= ~JI_COMMIT_RUNNING; @@ -267,9 +270,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = jbd2_journal_finish_inode_data_buffers(jinode); - if (!ret) - ret = err; + /* wait for the inode data buffers writeout. */ + if (journal->j_finish_inode_data_buffers) { + err = journal->j_finish_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b425f0b01dce..b9a9d69dde7e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -883,6 +883,10 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) OCFS2_JOURNAL_DIRTY_FL); journal->j_journal = j_journal; + journal->j_journal->j_submit_inode_data_buffers = + jbd2_journal_submit_inode_data_buffers; + journal->j_journal->j_finish_inode_data_buffers = + jbd2_journal_finish_inode_data_buffers; journal->j_inode = inode; journal->j_bh = bh; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8b7b06066bc2..04afa6dcd60d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -629,7 +629,9 @@ struct transaction_s struct journal_head *t_shadow_list; /* - * List of inodes whose data we've modified in data=ordered mode. + * List of inodes associated with the transaction; e.g., ext4 uses + * this to track inodes in data=ordered and data=journal mode that + * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; @@ -1111,6 +1113,27 @@ struct journal_s void (*j_commit_callback)(journal_t *, transaction_t *); + /** + * @j_submit_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WRITE_DATA flag + * before we start to write out the transaction to the journal. + */ + int (*j_submit_inode_data_buffers) + (struct jbd2_inode *); + + /** + * @j_finish_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WAIT_DATA flag + * after we have written the transaction to the journal + * but before we write out the commit block. + */ + int (*j_finish_inode_data_buffers) + (struct jbd2_inode *); + /* * Journal statistics */ -- cgit v1.2.3 From 995a3ed67fc8c0e3301a770016fb66f1bbf15ec8 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:54 -0700 Subject: ext4: add fast_commit feature and handling for extended mount options We are running out of mount option bits. Add handling for using s_mount_opt2. Add ext4 and jbd2 fast commit feature flag and also add ability to turn off the fast commit feature in Ext4. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-3-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 ++++ fs/ext4/super.c | 27 ++++++++++++++++++++++----- include/linux/jbd2.h | 5 ++++- 3 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1879531a119f..02d7dc378505 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1213,6 +1213,8 @@ struct ext4_inode_info { #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly specified journal checksum */ +#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ @@ -1813,6 +1815,7 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 +#define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400 #define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 @@ -1915,6 +1918,7 @@ EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR) EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE) EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX) EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2) +EXT4_FEATURE_COMPAT_FUNCS(fast_commit, FAST_COMMIT) EXT4_FEATURE_COMPAT_FUNCS(stable_inodes, STABLE_INODES) EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f997fc95cc31..66423c598b70 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1709,7 +1709,7 @@ enum { Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, - Opt_prefetch_block_bitmaps, + Opt_prefetch_block_bitmaps, Opt_no_fc, }; static const match_table_t tokens = { @@ -1796,6 +1796,7 @@ static const match_table_t tokens = { {Opt_init_itable, "init_itable=%u"}, {Opt_init_itable, "init_itable"}, {Opt_noinit_itable, "noinit_itable"}, + {Opt_no_fc, "no_fc"}, {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, @@ -1922,6 +1923,7 @@ static int clear_qf_name(struct super_block *sb, int qtype) #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) #define MOPT_STRING 0x0400 #define MOPT_SKIP 0x0800 +#define MOPT_2 0x1000 static const struct mount_opts { int token; @@ -2022,6 +2024,8 @@ static const struct mount_opts { {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS, MOPT_SET}, + {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, + MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY}, {Opt_err, 0, 0} }; @@ -2398,10 +2402,17 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, WARN_ON(1); return -1; } - if (arg != 0) - sbi->s_mount_opt |= m->mount_opt; - else - sbi->s_mount_opt &= ~m->mount_opt; + if (m->flags & MOPT_2) { + if (arg != 0) + sbi->s_mount_opt2 |= m->mount_opt; + else + sbi->s_mount_opt2 &= ~m->mount_opt; + } else { + if (arg != 0) + sbi->s_mount_opt |= m->mount_opt; + else + sbi->s_mount_opt &= ~m->mount_opt; + } } return 1; } @@ -2618,6 +2629,9 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PUTS("dax=inode"); } + if (test_opt2(sb, JOURNAL_FAST_COMMIT)) + SEQ_OPTS_PUTS("fast_commit"); + ext4_show_quota_options(seq, sb); return 0; } @@ -4121,6 +4135,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif + if (ext4_has_feature_fast_commit(sb)) + set_opt2(sb, JOURNAL_FAST_COMMIT); /* don't forget to enable journal_csum when metadata_csum is enabled. */ if (ext4_has_metadata_csum(sb)) set_opt(sb, JOURNAL_CHECKSUM); @@ -4777,6 +4793,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); + clear_opt2(sb, JOURNAL_FAST_COMMIT); sbi->s_journal = NULL; needs_recovery = 0; goto no_journal; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 04afa6dcd60d..0685cc95e501 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -289,6 +289,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* See "journal feature predicate functions" below */ @@ -299,7 +300,8 @@ typedef struct journal_superblock_s JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ - JBD2_FEATURE_INCOMPAT_CSUM_V3) + JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ + JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef __KERNEL__ @@ -1263,6 +1265,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) +JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) /* * Journal flag definitions -- cgit v1.2.3 From 6866d7b3f2bb4f011041ba54c98b1584497fe2fd Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:55 -0700 Subject: ext4 / jbd2: add fast commit initialization This patch adds fast commit area trackers in the journal_t structure. These are initialized via the jbd2_fc_init() routine that this patch adds. This patch also adds ext4/fast_commit.c and ext4/fast_commit.h files for fast commit code that will be added in subsequent patches in this series. Reported-by: kernel test robot Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-4-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/Makefile | 2 +- fs/ext4/ext4.h | 4 ++++ fs/ext4/fast_commit.c | 20 +++++++++++++++++++ fs/ext4/fast_commit.h | 9 +++++++++ fs/ext4/super.c | 1 + fs/jbd2/journal.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/jbd2.h | 39 +++++++++++++++++++++++++++++++++++++ 7 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 fs/ext4/fast_commit.c create mode 100644 fs/ext4/fast_commit.h (limited to 'include/linux') diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 2e42f47a7f98..49e7af6cc93f 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -10,7 +10,7 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \ mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \ super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \ - xattr_user.o + xattr_user.o fast_commit.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 02d7dc378505..2c412d32db0f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -963,6 +963,7 @@ do { \ #endif /* defined(__KERNEL__) || defined(__linux__) */ #include "extents_status.h" +#include "fast_commit.h" /* * Lock subclasses for i_data_sem in the ext4_inode_info structure. @@ -2678,6 +2679,9 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier); extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); +/* fast_commit.c */ + +void ext4_fc_init(struct super_block *sb, journal_t *journal); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; extern long ext4_mb_stats; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c new file mode 100644 index 000000000000..0dad8bdb1253 --- /dev/null +++ b/fs/ext4/fast_commit.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * fs/ext4/fast_commit.c + * + * Written by Harshad Shirwadkar + * + * Ext4 fast commits routines. + */ +#include "ext4_jbd2.h" + +void ext4_fc_init(struct super_block *sb, journal_t *journal) +{ + if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) + return; + if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { + pr_warn("Error while enabling fast commits, turning off."); + ext4_clear_feature_fast_commit(sb); + } +} diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h new file mode 100644 index 000000000000..8362bf5e6e00 --- /dev/null +++ b/fs/ext4/fast_commit.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __FAST_COMMIT_H__ +#define __FAST_COMMIT_H__ + +/* Number of blocks in journal area to allocate for fast commits */ +#define EXT4_NUM_FC_BLKS 256 + +#endif /* __FAST_COMMIT_H__ */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 66423c598b70..41da649ccaea 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5170,6 +5170,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_commit_interval = sbi->s_commit_interval; journal->j_min_batch_time = sbi->s_min_batch_time; journal->j_max_batch_time = sbi->s_max_batch_time; + ext4_fc_init(sb, journal); write_lock(&journal->j_state_lock); if (test_opt(sb, BARRIER)) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c0600405e7a2..4497bfbac527 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1181,6 +1181,14 @@ static journal_t *journal_init_common(struct block_device *bdev, if (!journal->j_wbuf) goto err_cleanup; + if (journal->j_fc_wbufsize > 0) { + journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, + sizeof(struct buffer_head *), + GFP_KERNEL); + if (!journal->j_fc_wbuf) + goto err_cleanup; + } + bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); if (!bh) { pr_err("%s: Cannot get buffer for journal superblock\n", @@ -1194,11 +1202,23 @@ static journal_t *journal_init_common(struct block_device *bdev, err_cleanup: kfree(journal->j_wbuf); + kfree(journal->j_fc_wbuf); jbd2_journal_destroy_revoke(journal); kfree(journal); return NULL; } +int jbd2_fc_init(journal_t *journal, int num_fc_blks) +{ + journal->j_fc_wbufsize = num_fc_blks; + journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!journal->j_fc_wbuf) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(jbd2_fc_init); + /* jbd2_journal_init_dev and jbd2_journal_init_inode: * * Create a journal structure assigned some fixed set of disk blocks to @@ -1316,11 +1336,20 @@ static int journal_reset(journal_t *journal) } journal->j_first = first; - journal->j_last = last; - journal->j_head = first; - journal->j_tail = first; - journal->j_free = last - first; + if (jbd2_has_feature_fast_commit(journal) && + journal->j_fc_wbufsize > 0) { + journal->j_fc_last = last; + journal->j_last = last - journal->j_fc_wbufsize; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + } else { + journal->j_last = last; + } + + journal->j_head = journal->j_first; + journal->j_tail = journal->j_first; + journal->j_free = journal->j_last - journal->j_first; journal->j_tail_sequence = journal->j_transaction_sequence; journal->j_commit_sequence = journal->j_transaction_sequence - 1; @@ -1665,9 +1694,18 @@ static int load_superblock(journal_t *journal) journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); journal->j_tail = be32_to_cpu(sb->s_start); journal->j_first = be32_to_cpu(sb->s_first); - journal->j_last = be32_to_cpu(sb->s_maxlen); journal->j_errno = be32_to_cpu(sb->s_errno); + if (jbd2_has_feature_fast_commit(journal) && + journal->j_fc_wbufsize > 0) { + journal->j_fc_last = be32_to_cpu(sb->s_maxlen); + journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + } else { + journal->j_last = be32_to_cpu(sb->s_maxlen); + } + return 0; } @@ -1728,6 +1766,9 @@ int jbd2_journal_load(journal_t *journal) */ journal->j_flags &= ~JBD2_ABORT; + if (journal->j_fc_wbufsize > 0) + jbd2_journal_set_features(journal, 0, 0, + JBD2_FEATURE_INCOMPAT_FAST_COMMIT); /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory * and reset them on disk. */ @@ -1811,6 +1852,8 @@ int jbd2_journal_destroy(journal_t *journal) jbd2_journal_destroy_revoke(journal); if (journal->j_chksum_driver) crypto_free_shash(journal->j_chksum_driver); + if (journal->j_fc_wbufsize > 0) + kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0685cc95e501..008629b4d615 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -918,6 +918,30 @@ struct journal_s */ unsigned long j_last; + /** + * @j_fc_first: + * + * The block number of the first fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_first; + + /** + * @j_fc_off: + * + * Number of fast commit blocks currently allocated. + * [j_state_lock]. + */ + unsigned long j_fc_off; + + /** + * @j_fc_last: + * + * The block number one beyond the last fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_last; + /** * @j_dev: Device where we store the journal. */ @@ -1068,6 +1092,12 @@ struct journal_s */ struct buffer_head **j_wbuf; + /** + * @j_fc_wbuf: Array of fast commit bhs for + * jbd2_journal_commit_transaction. + */ + struct buffer_head **j_fc_wbuf; + /** * @j_wbufsize: * @@ -1075,6 +1105,13 @@ struct journal_s */ int j_wbufsize; + /** + * @j_fc_wbufsize: + * + * Size of @j_fc_wbuf array. + */ + int j_fc_wbufsize; + /** * @j_last_sync_writer: * @@ -1535,6 +1572,8 @@ void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); +/* Fast commit related APIs */ +int jbd2_fc_init(journal_t *journal, int num_fc_blks); /* * is_journal_abort * -- cgit v1.2.3 From ff780b91efe901b8eecd8114785abae5341820ad Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:56 -0700 Subject: jbd2: add fast commit machinery This functions adds necessary APIs needed in JBD2 layer for fast commits. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-5-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 8 +++ fs/jbd2/commit.c | 44 ++++++++++++ fs/jbd2/journal.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/jbd2.h | 27 +++++++ 4 files changed, 268 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0dad8bdb1253..f2d11b4c6b62 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -8,11 +8,19 @@ * Ext4 fast commits routines. */ #include "ext4_jbd2.h" +/* + * Fast commit cleanup routine. This is called after every fast commit and + * full commit. full is true if we are called after a full commit. + */ +static void ext4_fc_cleanup(journal_t *journal, int full) +{ +} void ext4_fc_init(struct super_block *sb, journal_t *journal) { if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return; + journal->j_fc_cleanup_callback = ext4_fc_cleanup; if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { pr_warn("Error while enabling fast commits, turning off."); ext4_clear_feature_fast_commit(sb); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6252b4c50666..fa688e163a80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -206,6 +206,30 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) return generic_writepages(mapping, &wbc); } +/* Send all the data buffers related to an inode */ +int jbd2_submit_inode_data(struct jbd2_inode *jinode) +{ + + if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) + return 0; + + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); + return jbd2_journal_submit_inode_data_buffers(jinode); + +} +EXPORT_SYMBOL(jbd2_submit_inode_data); + +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) +{ + if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || + !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) + return 0; + return filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, + jinode->i_dirty_end); +} +EXPORT_SYMBOL(jbd2_wait_inode_data); + /* * Submit all the data buffers of inode associated with the transaction to * disk. @@ -415,6 +439,20 @@ void jbd2_journal_commit_transaction(journal_t *journal) J_ASSERT(journal->j_running_transaction != NULL); J_ASSERT(journal->j_committing_transaction == NULL); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + write_lock(&journal->j_state_lock); + finish_wait(&journal->j_fc_wait, &wait); + } + write_unlock(&journal->j_state_lock); + commit_transaction = journal->j_running_transaction; trace_jbd2_start_commit(journal, commit_transaction); @@ -422,6 +460,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_tid); write_lock(&journal->j_state_lock); + journal->j_fc_off = 0; J_ASSERT(commit_transaction->t_state == T_RUNNING); commit_transaction->t_state = T_LOCKED; @@ -1121,12 +1160,16 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 1); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING; + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ @@ -1138,6 +1181,7 @@ restart_loop: spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); + wake_up(&journal->j_fc_wait); /* * Calculate overall stats diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 4497bfbac527..0c7c42bd530f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -159,7 +159,9 @@ static void commit_timeout(struct timer_list *t) * * 1) COMMIT: Every so often we need to commit the current state of the * filesystem to disk. The journal thread is responsible for writing - * all of the metadata buffers to disk. + * all of the metadata buffers to disk. If a fast commit is ongoing + * journal thread waits until it's done and then continues from + * there on. * * 2) CHECKPOINT: We cannot reuse a used section of the log file until all * of the data in that part of the log has been rewritten elsewhere on @@ -716,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) return err; } +/* + * Start a fast commit. If there's an ongoing fast or full commit wait for + * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY + * if a fast commit is not needed, either because there's an already a commit + * going on or this tid has already been committed. Returns -EINVAL if no jbd2 + * commit has yet been performed. + */ +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) +{ + /* + * Fast commits only allowed if at least one full commit has + * been processed. + */ + if (!journal->j_stats.ts_tid) + return -EINVAL; + + if (tid <= journal->j_commit_sequence) + return -EALREADY; + + write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_fc_wait, &wait); + return -EALREADY; + } + journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_begin_commit); + +/* + * Stop a fast commit. If fallback is set, this function starts commit of + * TID tid before any other fast commit can start. + */ +static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) +{ + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 0); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; + if (fallback) + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + wake_up(&journal->j_fc_wait); + if (fallback) + return jbd2_complete_transaction(journal, tid); + return 0; +} + +int jbd2_fc_end_commit(journal_t *journal) +{ + return __jbd2_fc_end_commit(journal, 0, 0); +} +EXPORT_SYMBOL(jbd2_fc_end_commit); + +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid) +{ + return __jbd2_fc_end_commit(journal, tid, 1); +} +EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); + /* Return 1 when transaction with given tid has already committed. */ int jbd2_transaction_committed(journal_t *journal, tid_t tid) { @@ -784,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) return jbd2_journal_bmap(journal, blocknr, retp); } +/* Map one fast commit buffer for use by the file system */ +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) +{ + unsigned long long pblock; + unsigned long blocknr; + int ret = 0; + struct buffer_head *bh; + int fc_off; + + *bh_out = NULL; + write_lock(&journal->j_state_lock); + + if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { + fc_off = journal->j_fc_off; + blocknr = journal->j_fc_first + fc_off; + journal->j_fc_off++; + } else { + ret = -EINVAL; + } + write_unlock(&journal->j_state_lock); + + if (ret) + return ret; + + ret = jbd2_journal_bmap(journal, blocknr, &pblock); + if (ret) + return ret; + + bh = __getblk(journal->j_dev, pblock, journal->j_blocksize); + if (!bh) + return -ENOMEM; + + lock_buffer(bh); + + clear_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); + journal->j_fc_wbuf[fc_off] = bh; + + *bh_out = bh; + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_get_buf); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) +{ + struct buffer_head *bh; + int i, j_fc_off; + + read_lock(&journal->j_state_lock); + j_fc_off = journal->j_fc_off; + read_unlock(&journal->j_state_lock); + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { + bh = journal->j_fc_wbuf[i]; + wait_on_buffer(bh); + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + if (unlikely(!buffer_uptodate(bh))) + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_wait_bufs); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_release_bufs(journal_t *journal) +{ + struct buffer_head *bh; + int i, j_fc_off; + + read_lock(&journal->j_state_lock); + j_fc_off = journal->j_fc_off; + read_unlock(&journal->j_state_lock); + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= 0; i--) { + bh = journal->j_fc_wbuf[i]; + if (!bh) + break; + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_release_bufs); + /* * Conversion of logical to physical block numbers for the journal * @@ -1142,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev, init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); init_waitqueue_head(&journal->j_wait_reserved); + init_waitqueue_head(&journal->j_fc_wait); mutex_init(&journal->j_abort_mutex); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); @@ -1495,6 +1671,7 @@ out: static void jbd2_mark_journal_empty(journal_t *journal, int write_op) { journal_superblock_t *sb = journal->j_superblock; + bool had_fast_commit = false; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); lock_buffer(journal->j_sb_buffer); @@ -1508,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_start = cpu_to_be32(0); + if (jbd2_has_feature_fast_commit(journal)) { + /* + * When journal is clean, no need to commit fast commit flag and + * make file system incompatible with older kernels. + */ + jbd2_clear_feature_fast_commit(journal); + had_fast_commit = true; + } jbd2_write_superblock(journal, write_op); + if (had_fast_commit) + jbd2_set_feature_fast_commit(journal); + /* Log is no longer empty */ write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_FLUSHED; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 008629b4d615..a009d9b9c620 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -861,6 +861,13 @@ struct journal_s */ wait_queue_head_t j_wait_reserved; + /** + * @j_fc_wait: + * + * Wait queue to wait for completion of async fast commits. + */ + wait_queue_head_t j_fc_wait; + /** * @j_checkpoint_mutex: * @@ -1232,6 +1239,15 @@ struct journal_s */ struct lockdep_map j_trans_commit_map; #endif + + /** + * @j_fc_cleanup_callback: + * + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ + void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + }; #define jbd2_might_wait_for_commit(j) \ @@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ +#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ /* * Function declarations for the journaling transaction and buffer @@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_init(journal_t *journal, int num_fc_blks); +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit(journal_t *journal); +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); +int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); +int jbd2_fc_release_bufs(journal_t *journal); + /* * is_journal_abort * -- cgit v1.2.3 From 5b849b5f96b47d82b5a432d8b91a8ad260e1de46 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:58 -0700 Subject: jbd2: fast commit recovery path This patch adds fast commit recovery support in JBD2. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 15 ++++++++++++++ fs/jbd2/recovery.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++---- include/linux/jbd2.h | 20 ++++++++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 79e947c43198..888d9d217d5b 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1188,8 +1188,23 @@ static void ext4_fc_cleanup(journal_t *journal, int full) trace_ext4_fc_stats(sb); } +/* + * Main recovery path entry point. + */ +static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, + enum passtype pass, int off, tid_t expected_tid) +{ + return 0; +} + void ext4_fc_init(struct super_block *sb, journal_t *journal) { + /* + * We set replay callback even if fast commit disabled because we may + * could still have fast commit blocks that need to be replayed even if + * fast commit has now been turned off. + */ + journal->j_fc_replay_callback = ext4_fc_replay; if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return; journal->j_fc_cleanup_callback = ext4_fc_cleanup; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index fb134c7a12c8..eb2606133cd8 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -35,7 +35,6 @@ struct recovery_info int nr_revoke_hits; }; -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass); static int scan_revoke_records(journal_t *, struct buffer_head *, @@ -225,10 +224,51 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) /* Make sure we wrap around the log correctly! */ #define wrap(journal, var) \ do { \ - if (var >= (journal)->j_last) \ - var -= ((journal)->j_last - (journal)->j_first); \ + unsigned long _wrap_last = \ + jbd2_has_feature_fast_commit(journal) ? \ + (journal)->j_fc_last : (journal)->j_last; \ + \ + if (var >= _wrap_last) \ + var -= (_wrap_last - (journal)->j_first); \ } while (0) +static int fc_do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass) +{ + unsigned int expected_commit_id = info->end_transaction; + unsigned long next_fc_block; + struct buffer_head *bh; + int err = 0; + + next_fc_block = journal->j_fc_first; + if (!journal->j_fc_replay_callback) + return 0; + + while (next_fc_block <= journal->j_fc_last) { + jbd_debug(3, "Fast commit replay: next block %ld", + next_fc_block); + err = jread(&bh, journal, next_fc_block); + if (err) { + jbd_debug(3, "Fast commit replay: read error"); + break; + } + + jbd_debug(3, "Processing fast commit blk with seq %d"); + err = journal->j_fc_replay_callback(journal, bh, pass, + next_fc_block - journal->j_fc_first, + expected_commit_id); + next_fc_block++; + if (err < 0 || err == JBD2_FC_REPLAY_STOP) + break; + err = 0; + } + + if (err) + jbd_debug(3, "Fast commit replay failed, err = %d\n", err); + + return err; +} + /** * jbd2_journal_recover - recovers a on-disk journal * @journal: the journal to recover @@ -472,7 +512,9 @@ static int do_one_pass(journal_t *journal, break; jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", - next_commit_ID, next_log_block, journal->j_last); + next_commit_ID, next_log_block, + jbd2_has_feature_fast_commit(journal) ? + journal->j_fc_last : journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit @@ -834,6 +876,13 @@ static int do_one_pass(journal_t *journal, success = -EIO; } } + + if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { + err = fc_do_one_pass(journal, info, pass); + if (err) + success = err; + } + if (block_error && success == 0) success = -EIO; return success; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a009d9b9c620..fb3d71ad6eea 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -751,6 +751,11 @@ jbd2_time_diff(unsigned long start, unsigned long end) #define JBD2_NR_BATCH 64 +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; + +#define JBD2_FC_REPLAY_STOP 0 +#define JBD2_FC_REPLAY_CONTINUE 1 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -1248,6 +1253,21 @@ struct journal_s */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + /* + * @j_fc_replay_callback: + * + * File-system specific function that performs replay of a fast + * commit. JBD2 calls this function for each fast commit block found in + * the journal. This function should return JBD2_FC_REPLAY_CONTINUE + * to indicate that the block was processed correctly and more fast + * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP + * indicates the end of replay (no more blocks remaining). A negative + * return value indicates error. + */ + int (*j_fc_replay_callback)(struct journal_s *journal, + struct buffer_head *bh, + enum passtype pass, int off, + tid_t expected_commit_id); }; #define jbd2_might_wait_for_commit(j) \ -- cgit v1.2.3