diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 87 | 
1 files changed, 38 insertions, 49 deletions
| diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05dc3c17cb62..3f0b6d1936e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -125,8 +125,8 @@ struct async_submit_bio {   * Different roots are used for different purposes and may nest inside each   * other and they require separate keysets.  As lockdep keys should be   * static, assign keysets according to the purpose of the root as indicated - * by btrfs_root->objectid.  This ensures that all special purpose roots - * have separate keysets. + * by btrfs_root->root_key.objectid.  This ensures that all special purpose + * roots have separate keysets.   *   * Lock-nesting across peer nodes is always done with the immediate parent   * node locked thus preventing deadlock.  As lockdep doesn't know this, use @@ -1148,7 +1148,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,  	root->state = 0;  	root->orphan_cleanup_state = 0; -	root->objectid = objectid;  	root->last_trans = 0;  	root->highest_objectid = 0;  	root->nr_delalloc_inodes = 0; @@ -1665,9 +1664,8 @@ static int cleaner_kthread(void *arg)  	struct btrfs_root *root = arg;  	struct btrfs_fs_info *fs_info = root->fs_info;  	int again; -	struct btrfs_trans_handle *trans; -	do { +	while (1) {  		again = 0;  		/* Make the cleaner go to sleep early. */ @@ -1716,42 +1714,16 @@ static int cleaner_kthread(void *arg)  		 */  		btrfs_delete_unused_bgs(fs_info);  sleep: +		if (kthread_should_park()) +			kthread_parkme(); +		if (kthread_should_stop()) +			return 0;  		if (!again) {  			set_current_state(TASK_INTERRUPTIBLE); -			if (!kthread_should_stop()) -				schedule(); +			schedule();  			__set_current_state(TASK_RUNNING);  		} -	} while (!kthread_should_stop()); - -	/* -	 * Transaction kthread is stopped before us and wakes us up. -	 * However we might have started a new transaction and COWed some -	 * tree blocks when deleting unused block groups for example. So -	 * make sure we commit the transaction we started to have a clean -	 * shutdown when evicting the btree inode - if it has dirty pages -	 * when we do the final iput() on it, eviction will trigger a -	 * writeback for it which will fail with null pointer dereferences -	 * since work queues and other resources were already released and -	 * destroyed by the time the iput/eviction/writeback is made. -	 */ -	trans = btrfs_attach_transaction(root); -	if (IS_ERR(trans)) { -		if (PTR_ERR(trans) != -ENOENT) -			btrfs_err(fs_info, -				  "cleaner transaction attach returned %ld", -				  PTR_ERR(trans)); -	} else { -		int ret; - -		ret = btrfs_commit_transaction(trans); -		if (ret) -			btrfs_err(fs_info, -				  "cleaner open transaction commit returned %d", -				  ret);  	} - -	return 0;  }  static int transaction_kthread(void *arg) @@ -2156,9 +2128,8 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)  {  	mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);  	rwlock_init(&fs_info->dev_replace.lock); -	atomic_set(&fs_info->dev_replace.read_locks, 0);  	atomic_set(&fs_info->dev_replace.blocking_readers, 0); -	init_waitqueue_head(&fs_info->replace_wait); +	init_waitqueue_head(&fs_info->dev_replace.replace_wait);  	init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);  } @@ -2648,7 +2619,8 @@ int open_ctree(struct super_block *sb,  		goto fail_dirty_metadata_bytes;  	} -	ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); +	ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0, +			GFP_KERNEL);  	if (ret) {  		err = ret;  		goto fail_delalloc_bytes; @@ -3309,7 +3281,7 @@ fail_iput:  	iput(fs_info->btree_inode);  fail_bio_counter: -	percpu_counter_destroy(&fs_info->bio_counter); +	percpu_counter_destroy(&fs_info->dev_replace.bio_counter);  fail_delalloc_bytes:  	percpu_counter_destroy(&fs_info->delalloc_bytes);  fail_dirty_metadata_bytes: @@ -3932,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)  	int ret;  	set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); +	/* +	 * We don't want the cleaner to start new transactions, add more delayed +	 * iputs, etc. while we're closing. We can't use kthread_stop() yet +	 * because that frees the task_struct, and the transaction kthread might +	 * still try to wake up the cleaner. +	 */ +	kthread_park(fs_info->cleaner_kthread);  	/* wait for the qgroup rescan worker to stop */  	btrfs_qgroup_wait_for_completion(fs_info, false); @@ -3959,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)  	if (!sb_rdonly(fs_info->sb)) {  		/* -		 * If the cleaner thread is stopped and there are -		 * block groups queued for removal, the deletion will be -		 * skipped when we quit the cleaner thread. +		 * The cleaner kthread is stopped, so do one final pass over +		 * unused block groups.  		 */  		btrfs_delete_unused_bgs(fs_info); @@ -3977,6 +3955,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)  	kthread_stop(fs_info->transaction_kthread);  	kthread_stop(fs_info->cleaner_kthread); +	ASSERT(list_empty(&fs_info->delayed_iputs));  	set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);  	btrfs_free_qgroup_config(fs_info); @@ -4018,7 +3997,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)  	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);  	percpu_counter_destroy(&fs_info->delalloc_bytes); -	percpu_counter_destroy(&fs_info->bio_counter); +	percpu_counter_destroy(&fs_info->dev_replace.bio_counter);  	cleanup_srcu_struct(&fs_info->subvol_srcu);  	btrfs_free_stripe_hash_table(fs_info); @@ -4204,7 +4183,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  		return ret;  	} -	while ((node = rb_first(&delayed_refs->href_root)) != NULL) { +	while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {  		struct btrfs_delayed_ref_head *head;  		struct rb_node *n;  		bool pin_bytes = false; @@ -4222,11 +4201,11 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  			continue;  		}  		spin_lock(&head->lock); -		while ((n = rb_first(&head->ref_tree)) != NULL) { +		while ((n = rb_first_cached(&head->ref_tree)) != NULL) {  			ref = rb_entry(n, struct btrfs_delayed_ref_node,  				       ref_node);  			ref->in_tree = 0; -			rb_erase(&ref->ref_node, &head->ref_tree); +			rb_erase_cached(&ref->ref_node, &head->ref_tree);  			RB_CLEAR_NODE(&ref->ref_node);  			if (!list_empty(&ref->add_list))  				list_del(&ref->add_list); @@ -4240,7 +4219,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  		if (head->processing == 0)  			delayed_refs->num_heads_ready--;  		atomic_dec(&delayed_refs->num_entries); -		rb_erase(&head->href_node, &delayed_refs->href_root); +		rb_erase_cached(&head->href_node, &delayed_refs->href_root);  		RB_CLEAR_NODE(&head->href_node);  		spin_unlock(&head->lock);  		spin_unlock(&delayed_refs->lock); @@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,  	unpin = pinned_extents;  again:  	while (1) { +		/* +		 * The btrfs_finish_extent_commit() may get the same range as +		 * ours between find_first_extent_bit and clear_extent_dirty. +		 * Hence, hold the unused_bg_unpin_mutex to avoid double unpin +		 * the same extent range. +		 */ +		mutex_lock(&fs_info->unused_bg_unpin_mutex);  		ret = find_first_extent_bit(unpin, 0, &start, &end,  					    EXTENT_DIRTY, NULL); -		if (ret) +		if (ret) { +			mutex_unlock(&fs_info->unused_bg_unpin_mutex);  			break; +		}  		clear_extent_dirty(unpin, start, end);  		btrfs_error_unpin_extent_range(fs_info, start, end); +		mutex_unlock(&fs_info->unused_bg_unpin_mutex);  		cond_resched();  	} | 
