diff options
Diffstat (limited to 'fs/ext4/extents.c')
| -rw-r--r-- | fs/ext4/extents.c | 595 | 
1 files changed, 288 insertions, 307 deletions
| diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 72a361d5ef74..240b6dea5441 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2351,8 +2351,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,  {  	struct extent_status es; -	ext4_es_find_delayed_extent_range(inode, hole_start, -					  hole_start + hole_len - 1, &es); +	ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, +				  hole_start + hole_len - 1, &es);  	if (es.es_len) {  		/* There's delayed extent containing lblock? */  		if (es.es_lblk <= hole_start) @@ -2490,106 +2490,157 @@ static inline int get_default_free_blocks_flags(struct inode *inode)  	return 0;  } +/* + * ext4_rereserve_cluster - increment the reserved cluster count when + *                          freeing a cluster with a pending reservation + * + * @inode - file containing the cluster + * @lblk - logical block in cluster to be reserved + * + * Increments the reserved cluster count and adjusts quota in a bigalloc + * file system when freeing a partial cluster containing at least one + * delayed and unwritten block.  A partial cluster meeting that + * requirement will have a pending reservation.  If so, the + * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to + * defer reserved and allocated space accounting to a subsequent call + * to this function. + */ +static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk) +{ +	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); +	struct ext4_inode_info *ei = EXT4_I(inode); + +	dquot_reclaim_block(inode, EXT4_C2B(sbi, 1)); + +	spin_lock(&ei->i_block_reservation_lock); +	ei->i_reserved_data_blocks++; +	percpu_counter_add(&sbi->s_dirtyclusters_counter, 1); +	spin_unlock(&ei->i_block_reservation_lock); + +	percpu_counter_add(&sbi->s_freeclusters_counter, 1); +	ext4_remove_pending(inode, lblk); +} +  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,  			      struct ext4_extent *ex, -			      long long *partial_cluster, +			      struct partial_cluster *partial,  			      ext4_lblk_t from, ext4_lblk_t to)  {  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);  	unsigned short ee_len = ext4_ext_get_actual_len(ex); -	ext4_fsblk_t pblk; -	int flags = get_default_free_blocks_flags(inode); +	ext4_fsblk_t last_pblk, pblk; +	ext4_lblk_t num; +	int flags; + +	/* only extent tail removal is allowed */ +	if (from < le32_to_cpu(ex->ee_block) || +	    to != le32_to_cpu(ex->ee_block) + ee_len - 1) { +		ext4_error(sbi->s_sb, +			   "strange request: removal(2) %u-%u from %u:%u", +			   from, to, le32_to_cpu(ex->ee_block), ee_len); +		return 0; +	} + +#ifdef EXTENTS_STATS +	spin_lock(&sbi->s_ext_stats_lock); +	sbi->s_ext_blocks += ee_len; +	sbi->s_ext_extents++; +	if (ee_len < sbi->s_ext_min) +		sbi->s_ext_min = ee_len; +	if (ee_len > sbi->s_ext_max) +		sbi->s_ext_max = ee_len; +	if (ext_depth(inode) > sbi->s_depth_max) +		sbi->s_depth_max = ext_depth(inode); +	spin_unlock(&sbi->s_ext_stats_lock); +#endif + +	trace_ext4_remove_blocks(inode, ex, from, to, partial);  	/* -	 * For bigalloc file systems, we never free a partial cluster -	 * at the beginning of the extent.  Instead, we make a note -	 * that we tried freeing the cluster, and check to see if we -	 * need to free it on a subsequent call to ext4_remove_blocks, -	 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space. +	 * if we have a partial cluster, and it's different from the +	 * cluster of the last block in the extent, we free it  	 */ -	flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; +	last_pblk = ext4_ext_pblock(ex) + ee_len - 1; + +	if (partial->state != initial && +	    partial->pclu != EXT4_B2C(sbi, last_pblk)) { +		if (partial->state == tofree) { +			flags = get_default_free_blocks_flags(inode); +			if (ext4_is_pending(inode, partial->lblk)) +				flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER; +			ext4_free_blocks(handle, inode, NULL, +					 EXT4_C2B(sbi, partial->pclu), +					 sbi->s_cluster_ratio, flags); +			if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) +				ext4_rereserve_cluster(inode, partial->lblk); +		} +		partial->state = initial; +	} + +	num = le32_to_cpu(ex->ee_block) + ee_len - from; +	pblk = ext4_ext_pblock(ex) + ee_len - num; -	trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);  	/* -	 * If we have a partial cluster, and it's different from the -	 * cluster of the last block, we need to explicitly free the -	 * partial cluster here. +	 * We free the partial cluster at the end of the extent (if any), +	 * unless the cluster is used by another extent (partial_cluster +	 * state is nofree).  If a partial cluster exists here, it must be +	 * shared with the last block in the extent.  	 */ -	pblk = ext4_ext_pblock(ex) + ee_len - 1; -	if (*partial_cluster > 0 && -	    *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { +	flags = get_default_free_blocks_flags(inode); + +	/* partial, left end cluster aligned, right end unaligned */ +	if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) && +	    (EXT4_LBLK_CMASK(sbi, to) >= from) && +	    (partial->state != nofree)) { +		if (ext4_is_pending(inode, to)) +			flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;  		ext4_free_blocks(handle, inode, NULL, -				 EXT4_C2B(sbi, *partial_cluster), +				 EXT4_PBLK_CMASK(sbi, last_pblk),  				 sbi->s_cluster_ratio, flags); -		*partial_cluster = 0; +		if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) +			ext4_rereserve_cluster(inode, to); +		partial->state = initial; +		flags = get_default_free_blocks_flags(inode);  	} -#ifdef EXTENTS_STATS -	{ -		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -		spin_lock(&sbi->s_ext_stats_lock); -		sbi->s_ext_blocks += ee_len; -		sbi->s_ext_extents++; -		if (ee_len < sbi->s_ext_min) -			sbi->s_ext_min = ee_len; -		if (ee_len > sbi->s_ext_max) -			sbi->s_ext_max = ee_len; -		if (ext_depth(inode) > sbi->s_depth_max) -			sbi->s_depth_max = ext_depth(inode); -		spin_unlock(&sbi->s_ext_stats_lock); -	} -#endif -	if (from >= le32_to_cpu(ex->ee_block) -	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { -		/* tail removal */ -		ext4_lblk_t num; -		long long first_cluster; - -		num = le32_to_cpu(ex->ee_block) + ee_len - from; -		pblk = ext4_ext_pblock(ex) + ee_len - num; -		/* -		 * Usually we want to free partial cluster at the end of the -		 * extent, except for the situation when the cluster is still -		 * used by any other extent (partial_cluster is negative). -		 */ -		if (*partial_cluster < 0 && -		    *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1)) -			flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; +	flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; -		ext_debug("free last %u blocks starting %llu partial %lld\n", -			  num, pblk, *partial_cluster); -		ext4_free_blocks(handle, inode, NULL, pblk, num, flags); -		/* -		 * If the block range to be freed didn't start at the -		 * beginning of a cluster, and we removed the entire -		 * extent and the cluster is not used by any other extent, -		 * save the partial cluster here, since we might need to -		 * delete if we determine that the truncate or punch hole -		 * operation has removed all of the blocks in the cluster. -		 * If that cluster is used by another extent, preserve its -		 * negative value so it isn't freed later on. -		 * -		 * If the whole extent wasn't freed, we've reached the -		 * start of the truncated/punched region and have finished -		 * removing blocks.  If there's a partial cluster here it's -		 * shared with the remainder of the extent and is no longer -		 * a candidate for removal. -		 */ -		if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) { -			first_cluster = (long long) EXT4_B2C(sbi, pblk); -			if (first_cluster != -*partial_cluster) -				*partial_cluster = first_cluster; -		} else { -			*partial_cluster = 0; +	/* +	 * For bigalloc file systems, we never free a partial cluster +	 * at the beginning of the extent.  Instead, we check to see if we +	 * need to free it on a subsequent call to ext4_remove_blocks, +	 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space. +	 */ +	flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; +	ext4_free_blocks(handle, inode, NULL, pblk, num, flags); + +	/* reset the partial cluster if we've freed past it */ +	if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk)) +		partial->state = initial; + +	/* +	 * If we've freed the entire extent but the beginning is not left +	 * cluster aligned and is not marked as ineligible for freeing we +	 * record the partial cluster at the beginning of the extent.  It +	 * wasn't freed by the preceding ext4_free_blocks() call, and we +	 * need to look farther to the left to determine if it's to be freed +	 * (not shared with another extent). Else, reset the partial +	 * cluster - we're either  done freeing or the beginning of the +	 * extent is left cluster aligned. +	 */ +	if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) { +		if (partial->state == initial) { +			partial->pclu = EXT4_B2C(sbi, pblk); +			partial->lblk = from; +			partial->state = tofree;  		} -	} else -		ext4_error(sbi->s_sb, "strange request: removal(2) " -			   "%u-%u from %u:%u", -			   from, to, le32_to_cpu(ex->ee_block), ee_len); +	} else { +		partial->state = initial; +	} +  	return 0;  } -  /*   * ext4_ext_rm_leaf() Removes the extents associated with the   * blocks appearing between "start" and "end".  Both "start" @@ -2608,7 +2659,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,  static int  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		 struct ext4_ext_path *path, -		 long long *partial_cluster, +		 struct partial_cluster *partial,  		 ext4_lblk_t start, ext4_lblk_t end)  {  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2640,7 +2691,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  	ex_ee_block = le32_to_cpu(ex->ee_block);  	ex_ee_len = ext4_ext_get_actual_len(ex); -	trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); +	trace_ext4_ext_rm_leaf(inode, start, ex, partial);  	while (ex >= EXT_FIRST_EXTENT(eh) &&  			ex_ee_block + ex_ee_len > start) { @@ -2671,8 +2722,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  			 */  			if (sbi->s_cluster_ratio > 1) {  				pblk = ext4_ext_pblock(ex); -				*partial_cluster = -					-(long long) EXT4_B2C(sbi, pblk); +				partial->pclu = EXT4_B2C(sbi, pblk); +				partial->state = nofree;  			}  			ex--;  			ex_ee_block = le32_to_cpu(ex->ee_block); @@ -2714,8 +2765,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		if (err)  			goto out; -		err = ext4_remove_blocks(handle, inode, ex, partial_cluster, -					 a, b); +		err = ext4_remove_blocks(handle, inode, ex, partial, a, b);  		if (err)  			goto out; @@ -2769,18 +2819,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  	 * If there's a partial cluster and at least one extent remains in  	 * the leaf, free the partial cluster if it isn't shared with the  	 * current extent.  If it is shared with the current extent -	 * we zero partial_cluster because we've reached the start of the +	 * we reset the partial cluster because we've reached the start of the  	 * truncated/punched region and we're done removing blocks.  	 */ -	if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) { +	if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {  		pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; -		if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { +		if (partial->pclu != EXT4_B2C(sbi, pblk)) { +			int flags = get_default_free_blocks_flags(inode); + +			if (ext4_is_pending(inode, partial->lblk)) +				flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;  			ext4_free_blocks(handle, inode, NULL, -					 EXT4_C2B(sbi, *partial_cluster), -					 sbi->s_cluster_ratio, -					 get_default_free_blocks_flags(inode)); +					 EXT4_C2B(sbi, partial->pclu), +					 sbi->s_cluster_ratio, flags); +			if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) +				ext4_rereserve_cluster(inode, partial->lblk);  		} -		*partial_cluster = 0; +		partial->state = initial;  	}  	/* if this leaf is free, then we should @@ -2819,10 +2874,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);  	int depth = ext_depth(inode);  	struct ext4_ext_path *path = NULL; -	long long partial_cluster = 0; +	struct partial_cluster partial;  	handle_t *handle;  	int i = 0, err = 0; +	partial.pclu = 0; +	partial.lblk = 0; +	partial.state = initial; +  	ext_debug("truncate since %u to %u\n", start, end);  	/* probably first extent we're gonna free will be last in block */ @@ -2882,8 +2941,8 @@ again:  			 */  			if (sbi->s_cluster_ratio > 1) {  				pblk = ext4_ext_pblock(ex) + end - ee_block + 2; -				partial_cluster = -					-(long long) EXT4_B2C(sbi, pblk); +				partial.pclu = EXT4_B2C(sbi, pblk); +				partial.state = nofree;  			}  			/* @@ -2911,9 +2970,10 @@ again:  						    &ex);  			if (err)  				goto out; -			if (pblk) -				partial_cluster = -					-(long long) EXT4_B2C(sbi, pblk); +			if (pblk) { +				partial.pclu = EXT4_B2C(sbi, pblk); +				partial.state = nofree; +			}  		}  	}  	/* @@ -2948,8 +3008,7 @@ again:  		if (i == depth) {  			/* this is leaf block */  			err = ext4_ext_rm_leaf(handle, inode, path, -					       &partial_cluster, start, -					       end); +					       &partial, start, end);  			/* root level has p_bh == NULL, brelse() eats this */  			brelse(path[i].p_bh);  			path[i].p_bh = NULL; @@ -3021,21 +3080,24 @@ again:  		}  	} -	trace_ext4_ext_remove_space_done(inode, start, end, depth, -			partial_cluster, path->p_hdr->eh_entries); +	trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial, +					 path->p_hdr->eh_entries);  	/* -	 * If we still have something in the partial cluster and we have removed -	 * even the first extent, then we should free the blocks in the partial -	 * cluster as well.  (This code will only run when there are no leaves -	 * to the immediate left of the truncated/punched region.) +	 * if there's a partial cluster and we have removed the first extent +	 * in the file, then we also free the partial cluster, if any  	 */ -	if (partial_cluster > 0 && err == 0) { -		/* don't zero partial_cluster since it's not used afterwards */ +	if (partial.state == tofree && err == 0) { +		int flags = get_default_free_blocks_flags(inode); + +		if (ext4_is_pending(inode, partial.lblk)) +			flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;  		ext4_free_blocks(handle, inode, NULL, -				 EXT4_C2B(sbi, partial_cluster), -				 sbi->s_cluster_ratio, -				 get_default_free_blocks_flags(inode)); +				 EXT4_C2B(sbi, partial.pclu), +				 sbi->s_cluster_ratio, flags); +		if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) +			ext4_rereserve_cluster(inode, partial.lblk); +		partial.state = initial;  	}  	/* TODO: flexible tree reduction should be here */ @@ -3819,114 +3881,6 @@ out:  	return ext4_mark_inode_dirty(handle, inode);  } -/** - * ext4_find_delalloc_range: find delayed allocated block in the given range. - * - * Return 1 if there is a delalloc block in the range, otherwise 0. - */ -int ext4_find_delalloc_range(struct inode *inode, -			     ext4_lblk_t lblk_start, -			     ext4_lblk_t lblk_end) -{ -	struct extent_status es; - -	ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es); -	if (es.es_len == 0) -		return 0; /* there is no delay extent in this tree */ -	else if (es.es_lblk <= lblk_start && -		 lblk_start < es.es_lblk + es.es_len) -		return 1; -	else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end) -		return 1; -	else -		return 0; -} - -int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) -{ -	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -	ext4_lblk_t lblk_start, lblk_end; -	lblk_start = EXT4_LBLK_CMASK(sbi, lblk); -	lblk_end = lblk_start + sbi->s_cluster_ratio - 1; - -	return ext4_find_delalloc_range(inode, lblk_start, lblk_end); -} - -/** - * Determines how many complete clusters (out of those specified by the 'map') - * are under delalloc and were reserved quota for. - * This function is called when we are writing out the blocks that were - * originally written with their allocation delayed, but then the space was - * allocated using fallocate() before the delayed allocation could be resolved. - * The cases to look for are: - * ('=' indicated delayed allocated blocks - *  '-' indicates non-delayed allocated blocks) - * (a) partial clusters towards beginning and/or end outside of allocated range - *     are not delalloc'ed. - *	Ex: - *	|----c---=|====c====|====c====|===-c----| - *	         |++++++ allocated ++++++| - *	==> 4 complete clusters in above example - * - * (b) partial cluster (outside of allocated range) towards either end is - *     marked for delayed allocation. In this case, we will exclude that - *     cluster. - *	Ex: - *	|----====c========|========c========| - *	     |++++++ allocated ++++++| - *	==> 1 complete clusters in above example - * - *	Ex: - *	|================c================| - *            |++++++ allocated ++++++| - *	==> 0 complete clusters in above example - * - * The ext4_da_update_reserve_space will be called only if we - * determine here that there were some "entire" clusters that span - * this 'allocated' range. - * In the non-bigalloc case, this function will just end up returning num_blks - * without ever calling ext4_find_delalloc_range. - */ -static unsigned int -get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, -			   unsigned int num_blks) -{ -	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -	ext4_lblk_t alloc_cluster_start, alloc_cluster_end; -	ext4_lblk_t lblk_from, lblk_to, c_offset; -	unsigned int allocated_clusters = 0; - -	alloc_cluster_start = EXT4_B2C(sbi, lblk_start); -	alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1); - -	/* max possible clusters for this allocation */ -	allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1; - -	trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); - -	/* Check towards left side */ -	c_offset = EXT4_LBLK_COFF(sbi, lblk_start); -	if (c_offset) { -		lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); -		lblk_to = lblk_from + c_offset - 1; - -		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) -			allocated_clusters--; -	} - -	/* Now check towards right. */ -	c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); -	if (allocated_clusters && c_offset) { -		lblk_from = lblk_start + num_blks; -		lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; - -		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) -			allocated_clusters--; -	} - -	return allocated_clusters; -} -  static int  convert_initialized_extent(handle_t *handle, struct inode *inode,  			   struct ext4_map_blocks *map, @@ -4108,23 +4062,6 @@ out:  	}  	map->m_len = allocated; -	/* -	 * If we have done fallocate with the offset that is already -	 * delayed allocated, we would have block reservation -	 * and quota reservation done in the delayed write path. -	 * But fallocate would have already updated quota and block -	 * count for this offset. So cancel these reservation -	 */ -	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { -		unsigned int reserved_clusters; -		reserved_clusters = get_reserved_cluster_alloc(inode, -				map->m_lblk, map->m_len); -		if (reserved_clusters) -			ext4_da_update_reserve_space(inode, -						     reserved_clusters, -						     0); -	} -  map_out:  	map->m_flags |= EXT4_MAP_MAPPED;  	if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) { @@ -4513,77 +4450,39 @@ got_allocated_blocks:  	map->m_flags |= EXT4_MAP_NEW;  	/* -	 * Update reserved blocks/metadata blocks after successful -	 * block allocation which had been deferred till now. +	 * Reduce the reserved cluster count to reflect successful deferred +	 * allocation of delayed allocated clusters or direct allocation of +	 * clusters discovered to be delayed allocated.  Once allocated, a +	 * cluster is not included in the reserved count.  	 */ -	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { -		unsigned int reserved_clusters; -		/* -		 * Check how many clusters we had reserved this allocated range -		 */ -		reserved_clusters = get_reserved_cluster_alloc(inode, -						map->m_lblk, allocated); -		if (!map_from_cluster) { -			BUG_ON(allocated_clusters < reserved_clusters); -			if (reserved_clusters < allocated_clusters) { -				struct ext4_inode_info *ei = EXT4_I(inode); -				int reservation = allocated_clusters - -						  reserved_clusters; -				/* -				 * It seems we claimed few clusters outside of -				 * the range of this allocation. We should give -				 * it back to the reservation pool. This can -				 * happen in the following case: -				 * -				 * * Suppose s_cluster_ratio is 4 (i.e., each -				 *   cluster has 4 blocks. Thus, the clusters -				 *   are [0-3],[4-7],[8-11]... -				 * * First comes delayed allocation write for -				 *   logical blocks 10 & 11. Since there were no -				 *   previous delayed allocated blocks in the -				 *   range [8-11], we would reserve 1 cluster -				 *   for this write. -				 * * Next comes write for logical blocks 3 to 8. -				 *   In this case, we will reserve 2 clusters -				 *   (for [0-3] and [4-7]; and not for [8-11] as -				 *   that range has a delayed allocated blocks. -				 *   Thus total reserved clusters now becomes 3. -				 * * Now, during the delayed allocation writeout -				 *   time, we will first write blocks [3-8] and -				 *   allocate 3 clusters for writing these -				 *   blocks. Also, we would claim all these -				 *   three clusters above. -				 * * Now when we come here to writeout the -				 *   blocks [10-11], we would expect to claim -				 *   the reservation of 1 cluster we had made -				 *   (and we would claim it since there are no -				 *   more delayed allocated blocks in the range -				 *   [8-11]. But our reserved cluster count had -				 *   already gone to 0. -				 * -				 *   Thus, at the step 4 above when we determine -				 *   that there are still some unwritten delayed -				 *   allocated blocks outside of our current -				 *   block range, we should increment the -				 *   reserved clusters count so that when the -				 *   remaining blocks finally gets written, we -				 *   could claim them. -				 */ -				dquot_reserve_block(inode, -						EXT4_C2B(sbi, reservation)); -				spin_lock(&ei->i_block_reservation_lock); -				ei->i_reserved_data_blocks += reservation; -				spin_unlock(&ei->i_block_reservation_lock); -			} +	if (test_opt(inode->i_sb, DELALLOC) && !map_from_cluster) { +		if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {  			/* -			 * We will claim quota for all newly allocated blocks. -			 * We're updating the reserved space *after* the -			 * correction above so we do not accidentally free -			 * all the metadata reservation because we might -			 * actually need it later on. +			 * When allocating delayed allocated clusters, simply +			 * reduce the reserved cluster count and claim quota  			 */  			ext4_da_update_reserve_space(inode, allocated_clusters,  							1); +		} else { +			ext4_lblk_t lblk, len; +			unsigned int n; + +			/* +			 * When allocating non-delayed allocated clusters +			 * (from fallocate, filemap, DIO, or clusters +			 * allocated when delalloc has been disabled by +			 * ext4_nonda_switch), reduce the reserved cluster +			 * count by the number of allocated clusters that +			 * have previously been delayed allocated.  Quota +			 * has been claimed by ext4_mb_new_blocks() above, +			 * so release the quota reservations made for any +			 * previously delayed allocated clusters. +			 */ +			lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk); +			len = allocated_clusters << sbi->s_cluster_bits; +			n = ext4_es_delayed_clu(inode, lblk, len); +			if (n > 0) +				ext4_da_update_reserve_space(inode, (int) n, 0);  		}  	} @@ -5075,8 +4974,10 @@ static int ext4_find_delayed_extent(struct inode *inode,  	ext4_lblk_t block, next_del;  	if (newes->es_pblk == 0) { -		ext4_es_find_delayed_extent_range(inode, newes->es_lblk, -				newes->es_lblk + newes->es_len - 1, &es); +		ext4_es_find_extent_range(inode, &ext4_es_is_delayed, +					  newes->es_lblk, +					  newes->es_lblk + newes->es_len - 1, +					  &es);  		/*  		 * No extent in extent-tree contains block @newes->es_pblk, @@ -5097,7 +4998,8 @@ static int ext4_find_delayed_extent(struct inode *inode,  	}  	block = newes->es_lblk + newes->es_len; -	ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es); +	ext4_es_find_extent_range(inode, &ext4_es_is_delayed, block, +				  EXT_MAX_BLOCKS, &es);  	if (es.es_len == 0)  		next_del = EXT_MAX_BLOCKS;  	else @@ -5958,3 +5860,82 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,  	}  	return replaced_count;  } + +/* + * ext4_clu_mapped - determine whether any block in a logical cluster has + *                   been mapped to a physical cluster + * + * @inode - file containing the logical cluster + * @lclu - logical cluster of interest + * + * Returns 1 if any block in the logical cluster is mapped, signifying + * that a physical cluster has been allocated for it.  Otherwise, + * returns 0.  Can also return negative error codes.  Derived from + * ext4_ext_map_blocks(). + */ +int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) +{ +	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); +	struct ext4_ext_path *path; +	int depth, mapped = 0, err = 0; +	struct ext4_extent *extent; +	ext4_lblk_t first_lblk, first_lclu, last_lclu; + +	/* search for the extent closest to the first block in the cluster */ +	path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); +	if (IS_ERR(path)) { +		err = PTR_ERR(path); +		path = NULL; +		goto out; +	} + +	depth = ext_depth(inode); + +	/* +	 * A consistent leaf must not be empty.  This situation is possible, +	 * though, _during_ tree modification, and it's why an assert can't +	 * be put in ext4_find_extent(). +	 */ +	if (unlikely(path[depth].p_ext == NULL && depth != 0)) { +		EXT4_ERROR_INODE(inode, +		    "bad extent address - lblock: %lu, depth: %d, pblock: %lld", +				 (unsigned long) EXT4_C2B(sbi, lclu), +				 depth, path[depth].p_block); +		err = -EFSCORRUPTED; +		goto out; +	} + +	extent = path[depth].p_ext; + +	/* can't be mapped if the extent tree is empty */ +	if (extent == NULL) +		goto out; + +	first_lblk = le32_to_cpu(extent->ee_block); +	first_lclu = EXT4_B2C(sbi, first_lblk); + +	/* +	 * Three possible outcomes at this point - found extent spanning +	 * the target cluster, to the left of the target cluster, or to the +	 * right of the target cluster.  The first two cases are handled here. +	 * The last case indicates the target cluster is not mapped. +	 */ +	if (lclu >= first_lclu) { +		last_lclu = EXT4_B2C(sbi, first_lblk + +				     ext4_ext_get_actual_len(extent) - 1); +		if (lclu <= last_lclu) { +			mapped = 1; +		} else { +			first_lblk = ext4_ext_next_allocated_block(path); +			first_lclu = EXT4_B2C(sbi, first_lblk); +			if (lclu == first_lclu) +				mapped = 1; +		} +	} + +out: +	ext4_ext_drop_refs(path); +	kfree(path); + +	return err ? err : mapped; +} | 
