diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
| -rw-r--r-- | fs/xfs/xfs_log_recover.c | 114 | 
1 files changed, 105 insertions, 9 deletions
| diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 93f03ec17eec..7cf5e4eafe28 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(  }  /* - * Sort the log items in the transaction. Cancelled buffers need - * to be put first so they are processed before any items that might - * modify the buffers. If they are cancelled, then the modifications - * don't need to be replayed. + * Sort the log items in the transaction. + * + * The ordering constraints are defined by the inode allocation and unlink + * behaviour. The rules are: + * + *	1. Every item is only logged once in a given transaction. Hence it + *	   represents the last logged state of the item. Hence ordering is + *	   dependent on the order in which operations need to be performed so + *	   required initial conditions are always met. + * + *	2. Cancelled buffers are recorded in pass 1 in a separate table and + *	   there's nothing to replay from them so we can simply cull them + *	   from the transaction. However, we can't do that until after we've + *	   replayed all the other items because they may be dependent on the + *	   cancelled buffer and replaying the cancelled buffer can remove it + *	   form the cancelled buffer table. Hence they have tobe done last. + * + *	3. Inode allocation buffers must be replayed before inode items that + *	   read the buffer and replay changes into it. + * + *	4. Inode unlink buffers must be replayed after inode items are replayed. + *	   This ensures that inodes are completely flushed to the inode buffer + *	   in a "free" state before we remove the unlinked inode list pointer. + * + * Hence the ordering needs to be inode allocation buffers first, inode items + * second, inode unlink buffers third and cancelled buffers last. + * + * But there's a problem with that - we can't tell an inode allocation buffer + * apart from a regular buffer, so we can't separate them. We can, however, + * tell an inode unlink buffer from the others, and so we can separate them out + * from all the other buffers and move them to last. + * + * Hence, 4 lists, in order from head to tail: + * 	- buffer_list for all buffers except cancelled/inode unlink buffers + * 	- item_list for all non-buffer items + * 	- inode_buffer_list for inode unlink buffers + * 	- cancel_list for the cancelled buffers   */  STATIC int  xlog_recover_reorder_trans( @@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(  {  	xlog_recover_item_t	*item, *n;  	LIST_HEAD(sort_list); +	LIST_HEAD(cancel_list); +	LIST_HEAD(buffer_list); +	LIST_HEAD(inode_buffer_list); +	LIST_HEAD(inode_list);  	list_splice_init(&trans->r_itemq, &sort_list);  	list_for_each_entry_safe(item, n, &sort_list, ri_list) { @@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(  		switch (ITEM_TYPE(item)) {  		case XFS_LI_BUF: -			if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { +			if (buf_f->blf_flags & XFS_BLF_CANCEL) {  				trace_xfs_log_recover_item_reorder_head(log,  							trans, item, pass); -				list_move(&item->ri_list, &trans->r_itemq); +				list_move(&item->ri_list, &cancel_list); +				break; +			} +			if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { +				list_move(&item->ri_list, &inode_buffer_list);  				break;  			} +			list_move_tail(&item->ri_list, &buffer_list); +			break;  		case XFS_LI_INODE:  		case XFS_LI_DQUOT:  		case XFS_LI_QUOTAOFF: @@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(  		case XFS_LI_EFI:  			trace_xfs_log_recover_item_reorder_tail(log,  							trans, item, pass); -			list_move_tail(&item->ri_list, &trans->r_itemq); +			list_move_tail(&item->ri_list, &inode_list);  			break;  		default:  			xfs_warn(log->l_mp, @@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(  		}  	}  	ASSERT(list_empty(&sort_list)); +	if (!list_empty(&buffer_list)) +		list_splice(&buffer_list, &trans->r_itemq); +	if (!list_empty(&inode_list)) +		list_splice_tail(&inode_list, &trans->r_itemq); +	if (!list_empty(&inode_buffer_list)) +		list_splice_tail(&inode_buffer_list, &trans->r_itemq); +	if (!list_empty(&cancel_list)) +		list_splice_tail(&cancel_list, &trans->r_itemq);  	return 0;  } @@ -1794,7 +1845,13 @@ xlog_recover_do_inode_buffer(  	xfs_agino_t		*buffer_nextp;  	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); -	bp->b_ops = &xfs_inode_buf_ops; + +	/* +	 * Post recovery validation only works properly on CRC enabled +	 * filesystems. +	 */ +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		bp->b_ops = &xfs_inode_buf_ops;  	inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;  	for (i = 0; i < inodes_per_buf; i++) { @@ -1861,6 +1918,15 @@ xlog_recover_do_inode_buffer(  		buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,  					      next_unlinked_offset);  		*buffer_nextp = *logged_nextp; + +		/* +		 * If necessary, recalculate the CRC in the on-disk inode. We +		 * have to leave the inode in a consistent state for whoever +		 * reads it next.... +		 */ +		xfs_dinode_calc_crc(mp, (struct xfs_dinode *) +				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); +  	}  	return 0; @@ -2097,6 +2163,17 @@ xlog_recover_do_reg_buffer(  		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));  		/* +		 * The dirty regions logged in the buffer, even though +		 * contiguous, may span multiple chunks. This is because the +		 * dirty region may span a physical page boundary in a buffer +		 * and hence be split into two separate vectors for writing into +		 * the log. Hence we need to trim nbits back to the length of +		 * the current region being copied out of the log. +		 */ +		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) +			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; + +		/*  		 * Do a sanity check if this is a dquot buffer. Just checking  		 * the first dquot in the buffer should do. XXXThis is  		 * probably a good thing to do for other buf types also. @@ -2134,7 +2211,16 @@ xlog_recover_do_reg_buffer(  	/* Shouldn't be any more regions */  	ASSERT(i == item->ri_total); -	xlog_recovery_validate_buf_type(mp, bp, buf_f); +	/* +	 * We can only do post recovery validation on items on CRC enabled +	 * fielsystems as we need to know when the buffer was written to be able +	 * to determine if we should have replayed the item. If we replay old +	 * metadata over a newer buffer, then it will enter a temporarily +	 * inconsistent state resulting in verification failures. Hence for now +	 * just avoid the verification stage for non-crc filesystems +	 */ +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		xlog_recovery_validate_buf_type(mp, bp, buf_f);  }  /* @@ -2255,6 +2341,12 @@ xfs_qm_dqcheck(  	d->dd_diskdq.d_flags = type;  	d->dd_diskdq.d_id = cpu_to_be32(id); +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); +		xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), +				 XFS_DQUOT_CRC_OFF); +	} +  	return errs;  } @@ -2782,6 +2874,10 @@ xlog_recover_dquot_pass2(  	}  	memcpy(ddq, recddq, item->ri_buf[1].i_len); +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), +				 XFS_DQUOT_CRC_OFF); +	}  	ASSERT(dq_f->qlf_size == 2);  	ASSERT(bp->b_target->bt_mount == mp); | 
