diff options
Diffstat (limited to 'fs/btrfs/raid56.c')
| -rw-r--r-- | fs/btrfs/raid56.c | 119 | 
1 files changed, 81 insertions, 38 deletions
| diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a7f79254ecca..dec0907dfb8a 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)  		cur = h + i;  		INIT_LIST_HEAD(&cur->hash_list);  		spin_lock_init(&cur->lock); -		init_waitqueue_head(&cur->wait);  	}  	x = cmpxchg(&info->stripe_hash_table, NULL, table); @@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,  	 * bio list here, anyone else that wants to  	 * change this stripe needs to do their own rmw.  	 */ -	if (last->operation == BTRFS_RBIO_PARITY_SCRUB || -	    cur->operation == BTRFS_RBIO_PARITY_SCRUB) +	if (last->operation == BTRFS_RBIO_PARITY_SCRUB)  		return 0; -	if (last->operation == BTRFS_RBIO_REBUILD_MISSING || -	    cur->operation == BTRFS_RBIO_REBUILD_MISSING) +	if (last->operation == BTRFS_RBIO_REBUILD_MISSING)  		return 0; +	if (last->operation == BTRFS_RBIO_READ_REBUILD) { +		int fa = last->faila; +		int fb = last->failb; +		int cur_fa = cur->faila; +		int cur_fb = cur->failb; + +		if (last->faila >= last->failb) { +			fa = last->failb; +			fb = last->faila; +		} + +		if (cur->faila >= cur->failb) { +			cur_fa = cur->failb; +			cur_fb = cur->faila; +		} + +		if (fa != cur_fa || fb != cur_fb) +			return 0; +	}  	return 1;  } @@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)  	struct btrfs_raid_bio *cur;  	struct btrfs_raid_bio *pending;  	unsigned long flags; -	DEFINE_WAIT(wait);  	struct btrfs_raid_bio *freeit = NULL;  	struct btrfs_raid_bio *cache_drop = NULL;  	int ret = 0; @@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)  			}  			goto done_nolock; -			/* -			 * The barrier for this waitqueue_active is not needed, -			 * we're protected by h->lock and can't miss a wakeup. -			 */ -		} else if (waitqueue_active(&h->wait)) { -			spin_unlock(&rbio->bio_list_lock); -			spin_unlock_irqrestore(&h->lock, flags); -			wake_up(&h->wait); -			goto done_nolock;  		}  	}  done: @@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)  	kfree(rbio);  } -static void free_raid_bio(struct btrfs_raid_bio *rbio) +static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)  { -	unlock_stripe(rbio); -	__free_raid_bio(rbio); +	struct bio *next; + +	while (cur) { +		next = cur->bi_next; +		cur->bi_next = NULL; +		cur->bi_status = err; +		bio_endio(cur); +		cur = next; +	}  }  /* @@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)  static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)  {  	struct bio *cur = bio_list_get(&rbio->bio_list); -	struct bio *next; +	struct bio *extra;  	if (rbio->generic_bio_cnt)  		btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); -	free_raid_bio(rbio); +	/* +	 * At this moment, rbio->bio_list is empty, however since rbio does not +	 * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the +	 * hash list, rbio may be merged with others so that rbio->bio_list +	 * becomes non-empty. +	 * Once unlock_stripe() is done, rbio->bio_list will not be updated any +	 * more and we can call bio_endio() on all queued bios. +	 */ +	unlock_stripe(rbio); +	extra = bio_list_get(&rbio->bio_list); +	__free_raid_bio(rbio); -	while (cur) { -		next = cur->bi_next; -		cur->bi_next = NULL; -		cur->bi_status = err; -		bio_endio(cur); -		cur = next; -	} +	rbio_endio_bio_list(cur, err); +	if (extra) +		rbio_endio_bio_list(extra, err);  }  /* @@ -1435,14 +1454,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,   */  static void set_bio_pages_uptodate(struct bio *bio)  { -	struct bio_vec bvec; -	struct bvec_iter iter; +	struct bio_vec *bvec; +	int i; -	if (bio_flagged(bio, BIO_CLONED)) -		bio->bi_iter = btrfs_io_bio(bio)->iter; +	ASSERT(!bio_flagged(bio, BIO_CLONED)); -	bio_for_each_segment(bvec, bio, iter) -		SetPageUptodate(bvec.bv_page); +	bio_for_each_segment_all(bvec, bio, i) +		SetPageUptodate(bvec->bv_page);  }  /* @@ -1969,7 +1987,22 @@ cleanup:  cleanup_io:  	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { -		if (err == BLK_STS_OK) +		/* +		 * - In case of two failures, where rbio->failb != -1: +		 * +		 *   Do not cache this rbio since the above read reconstruction +		 *   (raid6_datap_recov() or raid6_2data_recov()) may have +		 *   changed some content of stripes which are not identical to +		 *   on-disk content any more, otherwise, a later write/recover +		 *   may steal stripe_pages from this rbio and end up with +		 *   corruptions or rebuild failures. +		 * +		 * - In case of single failure, where rbio->failb == -1: +		 * +		 *   Cache this rbio iff the above read reconstruction is +		 *   excuted without problems. +		 */ +		if (err == BLK_STS_OK && rbio->failb < 0)  			cache_rbio_pages(rbio);  		else  			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -2170,11 +2203,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,  	}  	/* -	 * reconstruct from the q stripe if they are -	 * asking for mirror 3 +	 * Loop retry: +	 * for 'mirror == 2', reconstruct from all other stripes. +	 * for 'mirror_num > 2', select a stripe to fail on every retry.  	 */ -	if (mirror_num == 3) -		rbio->failb = rbio->real_stripes - 2; +	if (mirror_num > 2) { +		/* +		 * 'mirror == 3' is to fail the p stripe and +		 * reconstruct from the q stripe.  'mirror > 3' is to +		 * fail a data stripe and reconstruct from p+q stripe. +		 */ +		rbio->failb = rbio->real_stripes - (mirror_num - 1); +		ASSERT(rbio->failb > 0); +		if (rbio->failb <= rbio->faila) +			rbio->failb--; +	}  	ret = lock_stripe_add(rbio); | 
