From a9c667f8f0656631ee5438baaf21bf30d5f67375 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 6 Jun 2011 09:51:52 -0400 Subject: ext4: fixed tracepoints cleanup While creating fixed tracepoints for ext3, basically by porting them from ext4, I found a lot of useless retyping, wrong type usage, useless variable passing and other inconsistencies in the ext4 fixed tracepoint code. This patch cleans the fixed tracepoint code for ext4 and also simplify some of them. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 179 +++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index e09592d2f916..5ce2b2f5f524 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -26,7 +26,7 @@ TRACE_EVENT(ext4_free_inode, __field( umode_t, mode ) __field( uid_t, uid ) __field( gid_t, gid ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -40,9 +40,8 @@ TRACE_EVENT(ext4_free_inode, TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->uid, __entry->gid, - (unsigned long long) __entry->blocks) + (unsigned long) __entry->ino, __entry->mode, + __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, @@ -178,7 +177,7 @@ TRACE_EVENT(ext4_begin_ordered_truncate, TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (long long) __entry->new_size) + __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, @@ -204,7 +203,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin, __entry->flags = flags; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) @@ -248,7 +247,7 @@ DECLARE_EVENT_CLASS(ext4__write_end, __entry->copied = copied; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) @@ -286,29 +285,6 @@ DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_writepage, - TP_PROTO(struct inode *inode, struct page *page), - - TP_ARGS(inode, page), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( pgoff_t, index ) - - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->index = page->index; - ), - - TP_printk("dev %d,%d ino %lu page_index %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->index) -); - TRACE_EVENT(ext4_da_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), @@ -341,7 +317,7 @@ TRACE_EVENT(ext4_da_writepages, ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " - "range_start %llu range_end %llu sync_mode %d" + "range_start %lld range_end %lld sync_mode %d" "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, @@ -449,7 +425,14 @@ DECLARE_EVENT_CLASS(ext4__page_op, TP_printk("dev %d,%d ino %lu page_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index) + (unsigned long) __entry->index) +); + +DEFINE_EVENT(ext4__page_op, ext4_writepage, + + TP_PROTO(struct page *page), + + TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_readpage, @@ -489,7 +472,7 @@ TRACE_EVENT(ext4_invalidatepage, TP_printk("dev %d,%d ino %lu page_index %lu offset %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index, __entry->offset) + (unsigned long) __entry->index, __entry->offset) ); TRACE_EVENT(ext4_discard_blocks, @@ -562,12 +545,10 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, ); TRACE_EVENT(ext4_mb_release_inode_pa, - TP_PROTO(struct super_block *sb, - struct inode *inode, - struct ext4_prealloc_space *pa, + TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), - TP_ARGS(sb, inode, pa, block, count), + TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) @@ -578,8 +559,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->ino = inode->i_ino; + __entry->dev = pa->pa_inode->i_sb->s_dev; + __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), @@ -591,10 +572,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ); TRACE_EVENT(ext4_mb_release_group_pa, - TP_PROTO(struct super_block *sb, - struct ext4_prealloc_space *pa), + TP_PROTO(struct ext4_prealloc_space *pa), - TP_ARGS(sb, pa), + TP_ARGS(pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -604,7 +584,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), @@ -666,10 +646,10 @@ TRACE_EVENT(ext4_request_blocks, __field( ino_t, ino ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -687,17 +667,13 @@ TRACE_EVENT(ext4_request_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu " - "lleft %llu lright %llu pleft %llu pright %llu ", + TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu " + "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->logical, __entry->goal, + __entry->lleft, __entry->lright, __entry->pleft, + __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, @@ -711,10 +687,10 @@ TRACE_EVENT(ext4_allocate_blocks, __field( __u64, block ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -733,17 +709,13 @@ TRACE_EVENT(ext4_allocate_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu " - "goal %llu lleft %llu lright %llu pleft %llu pright %llu", + TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u " + "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, __entry->block, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->block, __entry->logical, + __entry->goal, __entry->lleft, __entry->lright, + __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, @@ -755,10 +727,10 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( umode_t, mode ) __field( __u64, block ) __field( unsigned long, count ) - __field( int, flags ) + __field( int, flags ) ), TP_fast_assign( @@ -798,7 +770,7 @@ TRACE_EVENT(ext4_sync_file_enter, __entry->parent = dentry->d_parent->d_inode->i_ino; ), - TP_printk("dev %d,%d ino %ld parent %ld datasync %d ", + TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) @@ -821,7 +793,7 @@ TRACE_EVENT(ext4_sync_file_exit, __entry->dev = inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1005,7 +977,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc, __entry->result_len = len; ), - TP_printk("dev %d,%d inode %lu extent %u/%d/%u ", + TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, @@ -1093,7 +1065,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1127,7 +1099,7 @@ TRACE_EVENT(ext4_da_reserve_space, "reserved_data_blocks %d reserved_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->md_needed, __entry->reserved_data_blocks, __entry->reserved_meta_blocks) ); @@ -1164,7 +1136,7 @@ TRACE_EVENT(ext4_da_release_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1239,14 +1211,15 @@ TRACE_EVENT(ext4_direct_IO_enter, __entry->rw = rw; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, __entry->rw) + __entry->pos, __entry->len, __entry->rw) ); TRACE_EVENT(ext4_direct_IO_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret), + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, + int rw, int ret), TP_ARGS(inode, offset, len, rw, ret), @@ -1268,10 +1241,10 @@ TRACE_EVENT(ext4_direct_IO_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, + __entry->pos, __entry->len, __entry->rw, __entry->ret) ); @@ -1296,15 +1269,15 @@ TRACE_EVENT(ext4_fallocate_enter, __entry->mode = mode; ), - TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, - (unsigned long long) __entry->len, __entry->mode) + (unsigned long) __entry->ino, __entry->pos, + __entry->len, __entry->mode) ); TRACE_EVENT(ext4_fallocate_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), + TP_PROTO(struct inode *inode, loff_t offset, + unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), @@ -1312,7 +1285,7 @@ TRACE_EVENT(ext4_fallocate_exit, __field( ino_t, ino ) __field( dev_t, dev ) __field( loff_t, pos ) - __field( unsigned, blocks ) + __field( unsigned int, blocks ) __field( int, ret ) ), @@ -1324,10 +1297,10 @@ TRACE_EVENT(ext4_fallocate_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->blocks, + __entry->pos, __entry->blocks, __entry->ret) ); @@ -1350,7 +1323,7 @@ TRACE_EVENT(ext4_unlink_enter, __entry->dev = dentry->d_inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld size %lld parent %ld", + TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) @@ -1373,7 +1346,7 @@ TRACE_EVENT(ext4_unlink_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1387,7 +1360,7 @@ DECLARE_EVENT_CLASS(ext4__truncate, TP_STRUCT__entry( __field( ino_t, ino ) __field( dev_t, dev ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -1396,9 +1369,9 @@ DECLARE_EVENT_CLASS(ext4__truncate, __entry->blocks = inode->i_blocks; ), - TP_printk("dev %d,%d ino %lu blocks %lu", + TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, (unsigned long) __entry->blocks) + (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, @@ -1417,7 +1390,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - unsigned len, unsigned flags), + unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), @@ -1425,8 +1398,8 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, __field( ino_t, ino ) __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) - __field( unsigned, len ) - __field( unsigned, flags ) + __field( unsigned int, len ) + __field( unsigned int, flags ) ), TP_fast_assign( @@ -1440,7 +1413,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, __entry->len, __entry->flags) + __entry->lblk, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, @@ -1459,7 +1432,7 @@ DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, unsigned len, int ret), + ext4_fsblk_t pblk, unsigned int len, int ret), TP_ARGS(inode, lblk, pblk, len, ret), @@ -1468,7 +1441,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) - __field( unsigned, len ) + __field( unsigned int, len ) __field( int, ret ) ), @@ -1484,7 +1457,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, + __entry->lblk, __entry->pblk, __entry->len, __entry->ret) ); @@ -1524,7 +1497,7 @@ TRACE_EVENT(ext4_ext_load_extent, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk) + __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, -- cgit v1.2.3 From de1b794130b130e77ffa975bb58cb843744f9ae5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jun 2011 15:38:22 -0400 Subject: jbd2: Fix oops in jbd2_journal_remove_journal_head() jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 28 +++++++++------- fs/jbd2/commit.c | 33 +++++++++++-------- fs/jbd2/journal.c | 91 ++++++++++++++++----------------------------------- fs/jbd2/transaction.c | 67 +++++++++++++++++++------------------ include/linux/jbd2.h | 2 -- 5 files changed, 99 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a32..2c62c5aae82f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { + /* + * Get our reference so that bh cannot be freed before + * we unlock it + */ + get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); } else { @@ -223,8 +227,8 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + get_bh(bh); if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -243,7 +247,6 @@ restart: */ released = __jbd2_journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } @@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, int ret = 0; if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); + get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; + get_bh(bh); J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } else { /* @@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* * journal_clean_one_cp_list * - * Find all the written-back checkpoint buffers in the given list and release them. + * Find all the written-back checkpoint buffers in the given list and + * release them. * * Called with the journal locked. * Called with j_list_lock held. @@ -663,8 +667,8 @@ out: * checkpoint lists. * * The function returns 1 if it frees the transaction, 0 otherwise. + * The function can free jh and bh. * - * This function is called with the journal locked. * This function is called with j_list_lock held. * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ @@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) } journal = transaction->t_journal; + JBUFFER_TRACE(jh, "removing from transaction"); __buffer_unlink(jh); jh->b_cp_transaction = NULL; + jbd2_journal_put_journal_head(jh); if (transaction->t_checkpoint_list != NULL || transaction->t_checkpoint_io_list != NULL) goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); /* * There is one special case to worry about: if we have just pulled the @@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) * The locking here around t_state is a bit sleazy. * See the comment at the end of jbd2_journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction->t_state != T_FINISHED) goto out; - } /* OK, that was the last buffer for the transaction: we can now safely remove this transaction from the log */ @@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) wake_up(&journal->j_wait_logspace); ret = 1; out: - JBUFFER_TRACE(jh, "exit"); return ret; } @@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + /* Get reference for checkpointing transaction */ + jbd2_journal_grab_journal_head(jh2bh(jh)); jh->b_cp_transaction = transaction; if (!transaction->t_checkpoint_list) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf92..eef6979821a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -848,10 +848,16 @@ restart_loop: while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; + int try_to_free = 0; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); + /* + * Get a reference so that bh cannot be freed before we are + * done with it. + */ + get_bh(bh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); @@ -914,28 +920,27 @@ restart_loop: __jbd2_journal_insert_checkpoint(jh, commit_transaction); if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means + /* + * The buffer on BJ_Forget list and not jbddirty means * it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __jbd2_journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - jbd2_journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); + * list. + */ + if (!jh->b_next_transaction) + try_to_free = 1; } + JBUFFER_TRACE(jh, "refile or unfile buffer"); + __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + if (try_to_free) + release_buffer_page(bh); /* Drops bh reference */ + else + __brelse(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a7826990304..0dfa5b598e68 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = jbd2_journal_add_journal_head(bh); * ... + * (Get another reference for transaction) + * jbd2_journal_grab_journal_head(bh); * jh->b_transaction = xxx; + * (Put original reference) * jbd2_journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) @@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __func__); - jbd2_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __func__); - jbd2_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd2_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd2_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void jbd2_journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void jbd2_journal_put_journal_head(struct journal_head *jh) @@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 547b101049e5..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -30,6 +30,7 @@ #include static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); +static void __jbd2_journal_unfile_buffer(struct journal_head *jh); /* * jbd2_get_transaction: obtain a new transaction_t object. @@ -764,7 +765,6 @@ repeat: if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); @@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); - jh->b_transaction = transaction; - /* first access by this transaction */ jh->b_modified = 0; @@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); } else { __jbd2_journal_unfile_buffer(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) mark_buffer_dirty(bh); /* Expose it to the VM */ } -void __jbd2_journal_unfile_buffer(struct journal_head *jh) +/* + * Remove buffer from all transactions. + * + * Called with bh_state lock and j_list_lock + * + * jh and bh may be already freed when this function returns. + */ +static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; + jbd2_journal_put_journal_head(jh); } void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) { - jbd_lock_bh_state(jh2bh(jh)); + struct buffer_head *bh = jh2bh(jh); + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); __jbd2_journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + jbd_unlock_bh_state(bh); + __brelse(bh); } /* @@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __jbd2_journal_remove_checkpoint(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); } } spin_unlock(&journal->j_list_lock); @@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, /* * We take our own ref against the journal_head here to avoid * having to add tons of locking around each instance of - * jbd2_journal_remove_journal_head() and * jbd2_journal_put_journal_head(). */ jh = jbd2_journal_grab_journal_head(bh); @@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) int may_free = 1; struct buffer_head *bh = jh2bh(jh); - __jbd2_journal_unfile_buffer(jh); - if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + __jbd2_journal_temp_unlink_buffer(jh); /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in @@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); + __jbd2_journal_unfile_buffer(jh); } return may_free; } @@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, if (jh->b_transaction) __jbd2_journal_temp_unlink_buffer(jh); + else + jbd2_journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { @@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, * already started to be used by a subsequent transaction, refile the * buffer on that transaction's metadata list. * - * Called under journal->j_list_lock - * + * Called under j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)) + * + * jh and bh may be already free when this function returns */ void __jbd2_journal_refile_buffer(struct journal_head *jh) { @@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); + /* + * We set b_transaction here because b_next_transaction will inherit + * our jh reference and thus __jbd2_journal_file_buffer() must not + * take a new one. + */ jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) @@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) } /* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. - * - * __jbd2_journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** + * __jbd2_journal_refile_buffer() with necessary locking added. We take our + * bh reference so that we can safely unlock bh. + * + * The jh and bh may be freed by this call. */ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) { struct buffer_head *bh = jh2bh(jh); + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); - __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - spin_unlock(&journal->j_list_lock); __brelse(bh); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4ecb7b16b278..d087c2e7b2aa 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1024,7 +1024,6 @@ struct journal_s /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1165,7 +1164,6 @@ extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_in */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); -void jbd2_journal_remove_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* -- cgit v1.2.3