diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2018-06-22 21:20:35 +0200 | 
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2018-06-22 21:20:35 +0200 | 
| commit | 7731b8bc94e599c9a79e428f3359ff2c34b7576a (patch) | |
| tree | 879f18ccbe274122f2d4f095b43cbc7f953e0ada /fs/btrfs/ioctl.c | |
| parent | 48e315618dc4dc8904182cd221e3d395d5d97005 (diff) | |
| parent | 9ffc59d57228d74809700be6f7ecb1db10292f05 (diff) | |
Merge branch 'linus' into x86/urgent
Required to queue a dependent fix.
Diffstat (limited to 'fs/btrfs/ioctl.c')
| -rw-r--r-- | fs/btrfs/ioctl.c | 1140 | 
1 files changed, 774 insertions, 366 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 632e26d6f7ce..c2837a32d689 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -93,20 +93,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  		       int no_time_update);  /* Mask out flags that are inappropriate for the given type of inode. */ -static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags) +static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, +		unsigned int flags)  { -	if (S_ISDIR(mode)) +	if (S_ISDIR(inode->i_mode))  		return flags; -	else if (S_ISREG(mode)) +	else if (S_ISREG(inode->i_mode))  		return flags & ~FS_DIRSYNC_FL;  	else  		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);  }  /* - * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. + * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS + * ioctl.   */ -static unsigned int btrfs_flags_to_ioctl(unsigned int flags) +static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)  {  	unsigned int iflags = 0; @@ -136,20 +138,20 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)  /*   * Update inode->i_flags based on the btrfs internal flags.   */ -void btrfs_update_iflags(struct inode *inode) +void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)  { -	struct btrfs_inode *ip = BTRFS_I(inode); +	struct btrfs_inode *binode = BTRFS_I(inode);  	unsigned int new_fl = 0; -	if (ip->flags & BTRFS_INODE_SYNC) +	if (binode->flags & BTRFS_INODE_SYNC)  		new_fl |= S_SYNC; -	if (ip->flags & BTRFS_INODE_IMMUTABLE) +	if (binode->flags & BTRFS_INODE_IMMUTABLE)  		new_fl |= S_IMMUTABLE; -	if (ip->flags & BTRFS_INODE_APPEND) +	if (binode->flags & BTRFS_INODE_APPEND)  		new_fl |= S_APPEND; -	if (ip->flags & BTRFS_INODE_NOATIME) +	if (binode->flags & BTRFS_INODE_NOATIME)  		new_fl |= S_NOATIME; -	if (ip->flags & BTRFS_INODE_DIRSYNC) +	if (binode->flags & BTRFS_INODE_DIRSYNC)  		new_fl |= S_DIRSYNC;  	set_mask_bits(&inode->i_flags, @@ -159,15 +161,16 @@ void btrfs_update_iflags(struct inode *inode)  static int btrfs_ioctl_getflags(struct file *file, void __user *arg)  { -	struct btrfs_inode *ip = BTRFS_I(file_inode(file)); -	unsigned int flags = btrfs_flags_to_ioctl(ip->flags); +	struct btrfs_inode *binode = BTRFS_I(file_inode(file)); +	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);  	if (copy_to_user(arg, &flags, sizeof(flags)))  		return -EFAULT;  	return 0;  } -static int check_flags(unsigned int flags) +/* Check if @flags are a supported and valid set of FS_*_FL flags */ +static int check_fsflags(unsigned int flags)  {  	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \  		      FS_NOATIME_FL | FS_NODUMP_FL | \ @@ -186,13 +189,13 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  {  	struct inode *inode = file_inode(file);  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -	struct btrfs_inode *ip = BTRFS_I(inode); -	struct btrfs_root *root = ip->root; +	struct btrfs_inode *binode = BTRFS_I(inode); +	struct btrfs_root *root = binode->root;  	struct btrfs_trans_handle *trans; -	unsigned int flags, oldflags; +	unsigned int fsflags, old_fsflags;  	int ret; -	u64 ip_oldflags; -	unsigned int i_oldflags; +	u64 old_flags; +	unsigned int old_i_flags;  	umode_t mode;  	if (!inode_owner_or_capable(inode)) @@ -201,10 +204,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	if (btrfs_root_readonly(root))  		return -EROFS; -	if (copy_from_user(&flags, arg, sizeof(flags))) +	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))  		return -EFAULT; -	ret = check_flags(flags); +	ret = check_fsflags(fsflags);  	if (ret)  		return ret; @@ -214,44 +217,44 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	inode_lock(inode); -	ip_oldflags = ip->flags; -	i_oldflags = inode->i_flags; +	old_flags = binode->flags; +	old_i_flags = inode->i_flags;  	mode = inode->i_mode; -	flags = btrfs_mask_flags(inode->i_mode, flags); -	oldflags = btrfs_flags_to_ioctl(ip->flags); -	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { +	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); +	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); +	if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {  		if (!capable(CAP_LINUX_IMMUTABLE)) {  			ret = -EPERM;  			goto out_unlock;  		}  	} -	if (flags & FS_SYNC_FL) -		ip->flags |= BTRFS_INODE_SYNC; +	if (fsflags & FS_SYNC_FL) +		binode->flags |= BTRFS_INODE_SYNC;  	else -		ip->flags &= ~BTRFS_INODE_SYNC; -	if (flags & FS_IMMUTABLE_FL) -		ip->flags |= BTRFS_INODE_IMMUTABLE; +		binode->flags &= ~BTRFS_INODE_SYNC; +	if (fsflags & FS_IMMUTABLE_FL) +		binode->flags |= BTRFS_INODE_IMMUTABLE;  	else -		ip->flags &= ~BTRFS_INODE_IMMUTABLE; -	if (flags & FS_APPEND_FL) -		ip->flags |= BTRFS_INODE_APPEND; +		binode->flags &= ~BTRFS_INODE_IMMUTABLE; +	if (fsflags & FS_APPEND_FL) +		binode->flags |= BTRFS_INODE_APPEND;  	else -		ip->flags &= ~BTRFS_INODE_APPEND; -	if (flags & FS_NODUMP_FL) -		ip->flags |= BTRFS_INODE_NODUMP; +		binode->flags &= ~BTRFS_INODE_APPEND; +	if (fsflags & FS_NODUMP_FL) +		binode->flags |= BTRFS_INODE_NODUMP;  	else -		ip->flags &= ~BTRFS_INODE_NODUMP; -	if (flags & FS_NOATIME_FL) -		ip->flags |= BTRFS_INODE_NOATIME; +		binode->flags &= ~BTRFS_INODE_NODUMP; +	if (fsflags & FS_NOATIME_FL) +		binode->flags |= BTRFS_INODE_NOATIME;  	else -		ip->flags &= ~BTRFS_INODE_NOATIME; -	if (flags & FS_DIRSYNC_FL) -		ip->flags |= BTRFS_INODE_DIRSYNC; +		binode->flags &= ~BTRFS_INODE_NOATIME; +	if (fsflags & FS_DIRSYNC_FL) +		binode->flags |= BTRFS_INODE_DIRSYNC;  	else -		ip->flags &= ~BTRFS_INODE_DIRSYNC; -	if (flags & FS_NOCOW_FL) { +		binode->flags &= ~BTRFS_INODE_DIRSYNC; +	if (fsflags & FS_NOCOW_FL) {  		if (S_ISREG(mode)) {  			/*  			 * It's safe to turn csums off here, no extents exist. @@ -259,10 +262,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  			 * status of the file and will not set it.  			 */  			if (inode->i_size == 0) -				ip->flags |= BTRFS_INODE_NODATACOW -					   | BTRFS_INODE_NODATASUM; +				binode->flags |= BTRFS_INODE_NODATACOW +					      | BTRFS_INODE_NODATASUM;  		} else { -			ip->flags |= BTRFS_INODE_NODATACOW; +			binode->flags |= BTRFS_INODE_NODATACOW;  		}  	} else {  		/* @@ -270,10 +273,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  		 */  		if (S_ISREG(mode)) {  			if (inode->i_size == 0) -				ip->flags &= ~(BTRFS_INODE_NODATACOW +				binode->flags &= ~(BTRFS_INODE_NODATACOW  				             | BTRFS_INODE_NODATASUM);  		} else { -			ip->flags &= ~BTRFS_INODE_NODATACOW; +			binode->flags &= ~BTRFS_INODE_NODATACOW;  		}  	} @@ -282,18 +285,18 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	 * flag may be changed automatically if compression code won't make  	 * things smaller.  	 */ -	if (flags & FS_NOCOMP_FL) { -		ip->flags &= ~BTRFS_INODE_COMPRESS; -		ip->flags |= BTRFS_INODE_NOCOMPRESS; +	if (fsflags & FS_NOCOMP_FL) { +		binode->flags &= ~BTRFS_INODE_COMPRESS; +		binode->flags |= BTRFS_INODE_NOCOMPRESS;  		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);  		if (ret && ret != -ENODATA)  			goto out_drop; -	} else if (flags & FS_COMPR_FL) { +	} else if (fsflags & FS_COMPR_FL) {  		const char *comp; -		ip->flags |= BTRFS_INODE_COMPRESS; -		ip->flags &= ~BTRFS_INODE_NOCOMPRESS; +		binode->flags |= BTRFS_INODE_COMPRESS; +		binode->flags &= ~BTRFS_INODE_NOCOMPRESS;  		comp = btrfs_compress_type2str(fs_info->compress_type);  		if (!comp || comp[0] == 0) @@ -308,7 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);  		if (ret && ret != -ENODATA)  			goto out_drop; -		ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); +		binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);  	}  	trans = btrfs_start_transaction(root, 1); @@ -317,7 +320,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  		goto out_drop;  	} -	btrfs_update_iflags(inode); +	btrfs_sync_inode_flags_to_i_flags(inode);  	inode_inc_iversion(inode);  	inode->i_ctime = current_time(inode);  	ret = btrfs_update_inode(trans, root, inode); @@ -325,8 +328,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	btrfs_end_transaction(trans);   out_drop:  	if (ret) { -		ip->flags = ip_oldflags; -		inode->i_flags = i_oldflags; +		binode->flags = old_flags; +		inode->i_flags = old_i_flags;  	}   out_unlock: @@ -335,6 +338,148 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	return ret;  } +/* + * Translate btrfs internal inode flags to xflags as expected by the + * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are + * silently dropped. + */ +static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags) +{ +	unsigned int xflags = 0; + +	if (flags & BTRFS_INODE_APPEND) +		xflags |= FS_XFLAG_APPEND; +	if (flags & BTRFS_INODE_IMMUTABLE) +		xflags |= FS_XFLAG_IMMUTABLE; +	if (flags & BTRFS_INODE_NOATIME) +		xflags |= FS_XFLAG_NOATIME; +	if (flags & BTRFS_INODE_NODUMP) +		xflags |= FS_XFLAG_NODUMP; +	if (flags & BTRFS_INODE_SYNC) +		xflags |= FS_XFLAG_SYNC; + +	return xflags; +} + +/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */ +static int check_xflags(unsigned int flags) +{ +	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME | +		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC)) +		return -EOPNOTSUPP; +	return 0; +} + +/* + * Set the xflags from the internal inode flags. The remaining items of fsxattr + * are zeroed. + */ +static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg) +{ +	struct btrfs_inode *binode = BTRFS_I(file_inode(file)); +	struct fsxattr fa; + +	memset(&fa, 0, sizeof(fa)); +	fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); + +	if (copy_to_user(arg, &fa, sizeof(fa))) +		return -EFAULT; + +	return 0; +} + +static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) +{ +	struct inode *inode = file_inode(file); +	struct btrfs_inode *binode = BTRFS_I(inode); +	struct btrfs_root *root = binode->root; +	struct btrfs_trans_handle *trans; +	struct fsxattr fa; +	unsigned old_flags; +	unsigned old_i_flags; +	int ret = 0; + +	if (!inode_owner_or_capable(inode)) +		return -EPERM; + +	if (btrfs_root_readonly(root)) +		return -EROFS; + +	memset(&fa, 0, sizeof(fa)); +	if (copy_from_user(&fa, arg, sizeof(fa))) +		return -EFAULT; + +	ret = check_xflags(fa.fsx_xflags); +	if (ret) +		return ret; + +	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0) +		return -EOPNOTSUPP; + +	ret = mnt_want_write_file(file); +	if (ret) +		return ret; + +	inode_lock(inode); + +	old_flags = binode->flags; +	old_i_flags = inode->i_flags; + +	/* We need the capabilities to change append-only or immutable inode */ +	if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || +	     (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && +	    !capable(CAP_LINUX_IMMUTABLE)) { +		ret = -EPERM; +		goto out_unlock; +	} + +	if (fa.fsx_xflags & FS_XFLAG_SYNC) +		binode->flags |= BTRFS_INODE_SYNC; +	else +		binode->flags &= ~BTRFS_INODE_SYNC; +	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE) +		binode->flags |= BTRFS_INODE_IMMUTABLE; +	else +		binode->flags &= ~BTRFS_INODE_IMMUTABLE; +	if (fa.fsx_xflags & FS_XFLAG_APPEND) +		binode->flags |= BTRFS_INODE_APPEND; +	else +		binode->flags &= ~BTRFS_INODE_APPEND; +	if (fa.fsx_xflags & FS_XFLAG_NODUMP) +		binode->flags |= BTRFS_INODE_NODUMP; +	else +		binode->flags &= ~BTRFS_INODE_NODUMP; +	if (fa.fsx_xflags & FS_XFLAG_NOATIME) +		binode->flags |= BTRFS_INODE_NOATIME; +	else +		binode->flags &= ~BTRFS_INODE_NOATIME; + +	/* 1 item for the inode */ +	trans = btrfs_start_transaction(root, 1); +	if (IS_ERR(trans)) { +		ret = PTR_ERR(trans); +		goto out_unlock; +	} + +	btrfs_sync_inode_flags_to_i_flags(inode); +	inode_inc_iversion(inode); +	inode->i_ctime = current_time(inode); +	ret = btrfs_update_inode(trans, root, inode); + +	btrfs_end_transaction(trans); + +out_unlock: +	if (ret) { +		binode->flags = old_flags; +		inode->i_flags = old_i_flags; +	} + +	inode_unlock(inode); +	mnt_drop_write_file(file); + +	return ret; +} +  static int btrfs_ioctl_getversion(struct file *file, int __user *arg)  {  	struct inode *inode = file_inode(file); @@ -417,14 +562,13 @@ static noinline int create_subvol(struct inode *dir,  	struct btrfs_root *root = BTRFS_I(dir)->root;  	struct btrfs_root *new_root;  	struct btrfs_block_rsv block_rsv; -	struct timespec cur_time = current_time(dir); +	struct timespec64 cur_time = current_time(dir);  	struct inode *inode;  	int ret;  	int err;  	u64 objectid;  	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;  	u64 index = 0; -	u64 qgroup_reserved;  	uuid_le new_uuid;  	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); @@ -449,8 +593,7 @@ static noinline int create_subvol(struct inode *dir,  	 * The same as the snapshot creation, please see the comment  	 * of create_snapshot().  	 */ -	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, -					       8, &qgroup_reserved, false); +	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);  	if (ret)  		goto fail_free; @@ -573,7 +716,7 @@ static noinline int create_subvol(struct inode *dir,  				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);  	BUG_ON(ret); -	ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid, +	ret = btrfs_uuid_tree_add(trans, root_item->uuid,  				  BTRFS_UUID_KEY_SUBVOL, objectid);  	if (ret)  		btrfs_abort_transaction(trans, ret); @@ -640,7 +783,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  	wait_event(root->subv_writers->wait,  		   percpu_counter_sum(&root->subv_writers->counter) == 0); -	ret = btrfs_start_delalloc_inodes(root, 0); +	ret = btrfs_start_delalloc_inodes(root);  	if (ret)  		goto dec_and_free; @@ -658,7 +801,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  	 */  	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,  					&pending_snapshot->block_rsv, 8, -					&pending_snapshot->qgroup_reserved,  					false);  	if (ret)  		goto dec_and_free; @@ -1457,7 +1599,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,  		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;  	} -	mutex_lock(&fs_info->volume_mutex);  	vol_args = memdup_user(arg, sizeof(*vol_args));  	if (IS_ERR(vol_args)) {  		ret = PTR_ERR(vol_args); @@ -1565,7 +1706,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,  out_free:  	kfree(vol_args);  out: -	mutex_unlock(&fs_info->volume_mutex);  	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);  	mnt_drop_write_file(file);  	return ret; @@ -1832,60 +1972,6 @@ out:  	return ret;  } -/* - * helper to check if the subvolume references other subvolumes - */ -static noinline int may_destroy_subvol(struct btrfs_root *root) -{ -	struct btrfs_fs_info *fs_info = root->fs_info; -	struct btrfs_path *path; -	struct btrfs_dir_item *di; -	struct btrfs_key key; -	u64 dir_id; -	int ret; - -	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; - -	/* Make sure this root isn't set as the default subvol */ -	dir_id = btrfs_super_root_dir(fs_info->super_copy); -	di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path, -				   dir_id, "default", 7, 0); -	if (di && !IS_ERR(di)) { -		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); -		if (key.objectid == root->root_key.objectid) { -			ret = -EPERM; -			btrfs_err(fs_info, -				  "deleting default subvolume %llu is not allowed", -				  key.objectid); -			goto out; -		} -		btrfs_release_path(path); -	} - -	key.objectid = root->root_key.objectid; -	key.type = BTRFS_ROOT_REF_KEY; -	key.offset = (u64)-1; - -	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); -	if (ret < 0) -		goto out; -	BUG_ON(ret == 0); - -	ret = 0; -	if (path->slots[0] > 0) { -		path->slots[0]--; -		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); -		if (key.objectid == root->root_key.objectid && -		    key.type == BTRFS_ROOT_REF_KEY) -			ret = -ENOTEMPTY; -	} -out: -	btrfs_free_path(path); -	return ret; -} -  static noinline int key_in_sk(struct btrfs_key *key,  			      struct btrfs_ioctl_search_key *sk)  { @@ -2066,7 +2152,7 @@ static noinline int search_ioctl(struct inode *inode,  		root = btrfs_read_fs_root_no_name(info, &key);  		if (IS_ERR(root)) {  			btrfs_free_path(path); -			return -ENOENT; +			return PTR_ERR(root);  		}  	} @@ -2200,8 +2286,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  	key.offset = (u64)-1;  	root = btrfs_read_fs_root_no_name(info, &key);  	if (IS_ERR(root)) { -		btrfs_err(info, "could not find root %llu", tree_id); -		ret = -ENOENT; +		ret = PTR_ERR(root);  		goto out;  	} @@ -2256,6 +2341,169 @@ out:  	return ret;  } +static int btrfs_search_path_in_tree_user(struct inode *inode, +				struct btrfs_ioctl_ino_lookup_user_args *args) +{ +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; +	struct super_block *sb = inode->i_sb; +	struct btrfs_key upper_limit = BTRFS_I(inode)->location; +	u64 treeid = BTRFS_I(inode)->root->root_key.objectid; +	u64 dirid = args->dirid; +	unsigned long item_off; +	unsigned long item_len; +	struct btrfs_inode_ref *iref; +	struct btrfs_root_ref *rref; +	struct btrfs_root *root; +	struct btrfs_path *path; +	struct btrfs_key key, key2; +	struct extent_buffer *leaf; +	struct inode *temp_inode; +	char *ptr; +	int slot; +	int len; +	int total_len = 0; +	int ret; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	/* +	 * If the bottom subvolume does not exist directly under upper_limit, +	 * construct the path in from the bottom up. +	 */ +	if (dirid != upper_limit.objectid) { +		ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; + +		key.objectid = treeid; +		key.type = BTRFS_ROOT_ITEM_KEY; +		key.offset = (u64)-1; +		root = btrfs_read_fs_root_no_name(fs_info, &key); +		if (IS_ERR(root)) { +			ret = PTR_ERR(root); +			goto out; +		} + +		key.objectid = dirid; +		key.type = BTRFS_INODE_REF_KEY; +		key.offset = (u64)-1; +		while (1) { +			ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +			if (ret < 0) { +				goto out; +			} else if (ret > 0) { +				ret = btrfs_previous_item(root, path, dirid, +							  BTRFS_INODE_REF_KEY); +				if (ret < 0) { +					goto out; +				} else if (ret > 0) { +					ret = -ENOENT; +					goto out; +				} +			} + +			leaf = path->nodes[0]; +			slot = path->slots[0]; +			btrfs_item_key_to_cpu(leaf, &key, slot); + +			iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref); +			len = btrfs_inode_ref_name_len(leaf, iref); +			ptr -= len + 1; +			total_len += len + 1; +			if (ptr < args->path) { +				ret = -ENAMETOOLONG; +				goto out; +			} + +			*(ptr + len) = '/'; +			read_extent_buffer(leaf, ptr, +					(unsigned long)(iref + 1), len); + +			/* Check the read+exec permission of this directory */ +			ret = btrfs_previous_item(root, path, dirid, +						  BTRFS_INODE_ITEM_KEY); +			if (ret < 0) { +				goto out; +			} else if (ret > 0) { +				ret = -ENOENT; +				goto out; +			} + +			leaf = path->nodes[0]; +			slot = path->slots[0]; +			btrfs_item_key_to_cpu(leaf, &key2, slot); +			if (key2.objectid != dirid) { +				ret = -ENOENT; +				goto out; +			} + +			temp_inode = btrfs_iget(sb, &key2, root, NULL); +			if (IS_ERR(temp_inode)) { +				ret = PTR_ERR(temp_inode); +				goto out; +			} +			ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC); +			iput(temp_inode); +			if (ret) { +				ret = -EACCES; +				goto out; +			} + +			if (key.offset == upper_limit.objectid) +				break; +			if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { +				ret = -EACCES; +				goto out; +			} + +			btrfs_release_path(path); +			key.objectid = key.offset; +			key.offset = (u64)-1; +			dirid = key.objectid; +		} + +		memmove(args->path, ptr, total_len); +		args->path[total_len] = '\0'; +		btrfs_release_path(path); +	} + +	/* Get the bottom subvolume's name from ROOT_REF */ +	root = fs_info->tree_root; +	key.objectid = treeid; +	key.type = BTRFS_ROOT_REF_KEY; +	key.offset = args->treeid; +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (ret < 0) { +		goto out; +	} else if (ret > 0) { +		ret = -ENOENT; +		goto out; +	} + +	leaf = path->nodes[0]; +	slot = path->slots[0]; +	btrfs_item_key_to_cpu(leaf, &key, slot); + +	item_off = btrfs_item_ptr_offset(leaf, slot); +	item_len = btrfs_item_size_nr(leaf, slot); +	/* Check if dirid in ROOT_REF corresponds to passed dirid */ +	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); +	if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { +		ret = -EINVAL; +		goto out; +	} + +	/* Copy subvolume's name */ +	item_off += sizeof(struct btrfs_root_ref); +	item_len -= sizeof(struct btrfs_root_ref); +	read_extent_buffer(leaf, args->name, item_off, item_len); +	args->name[item_len] = 0; + +out: +	btrfs_free_path(path); +	return ret; +} +  static noinline int btrfs_ioctl_ino_lookup(struct file *file,  					   void __user *argp)  { @@ -2298,6 +2546,265 @@ out:  	return ret;  } +/* + * Version of ino_lookup ioctl (unprivileged) + * + * The main differences from ino_lookup ioctl are: + * + *   1. Read + Exec permission will be checked using inode_permission() during + *      path construction. -EACCES will be returned in case of failure. + *   2. Path construction will be stopped at the inode number which corresponds + *      to the fd with which this ioctl is called. If constructed path does not + *      exist under fd's inode, -EACCES will be returned. + *   3. The name of bottom subvolume is also searched and filled. + */ +static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) +{ +	struct btrfs_ioctl_ino_lookup_user_args *args; +	struct inode *inode; +	int ret; + +	args = memdup_user(argp, sizeof(*args)); +	if (IS_ERR(args)) +		return PTR_ERR(args); + +	inode = file_inode(file); + +	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && +	    BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) { +		/* +		 * The subvolume does not exist under fd with which this is +		 * called +		 */ +		kfree(args); +		return -EACCES; +	} + +	ret = btrfs_search_path_in_tree_user(inode, args); + +	if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) +		ret = -EFAULT; + +	kfree(args); +	return ret; +} + +/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */ +static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) +{ +	struct btrfs_ioctl_get_subvol_info_args *subvol_info; +	struct btrfs_fs_info *fs_info; +	struct btrfs_root *root; +	struct btrfs_path *path; +	struct btrfs_key key; +	struct btrfs_root_item *root_item; +	struct btrfs_root_ref *rref; +	struct extent_buffer *leaf; +	unsigned long item_off; +	unsigned long item_len; +	struct inode *inode; +	int slot; +	int ret = 0; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL); +	if (!subvol_info) { +		btrfs_free_path(path); +		return -ENOMEM; +	} + +	inode = file_inode(file); +	fs_info = BTRFS_I(inode)->root->fs_info; + +	/* Get root_item of inode's subvolume */ +	key.objectid = BTRFS_I(inode)->root->root_key.objectid; +	key.type = BTRFS_ROOT_ITEM_KEY; +	key.offset = (u64)-1; +	root = btrfs_read_fs_root_no_name(fs_info, &key); +	if (IS_ERR(root)) { +		ret = PTR_ERR(root); +		goto out; +	} +	root_item = &root->root_item; + +	subvol_info->treeid = key.objectid; + +	subvol_info->generation = btrfs_root_generation(root_item); +	subvol_info->flags = btrfs_root_flags(root_item); + +	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE); +	memcpy(subvol_info->parent_uuid, root_item->parent_uuid, +						    BTRFS_UUID_SIZE); +	memcpy(subvol_info->received_uuid, root_item->received_uuid, +						    BTRFS_UUID_SIZE); + +	subvol_info->ctransid = btrfs_root_ctransid(root_item); +	subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime); +	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime); + +	subvol_info->otransid = btrfs_root_otransid(root_item); +	subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime); +	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime); + +	subvol_info->stransid = btrfs_root_stransid(root_item); +	subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime); +	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime); + +	subvol_info->rtransid = btrfs_root_rtransid(root_item); +	subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime); +	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime); + +	if (key.objectid != BTRFS_FS_TREE_OBJECTID) { +		/* Search root tree for ROOT_BACKREF of this subvolume */ +		root = fs_info->tree_root; + +		key.type = BTRFS_ROOT_BACKREF_KEY; +		key.offset = 0; +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +		if (ret < 0) { +			goto out; +		} else if (path->slots[0] >= +			   btrfs_header_nritems(path->nodes[0])) { +			ret = btrfs_next_leaf(root, path); +			if (ret < 0) { +				goto out; +			} else if (ret > 0) { +				ret = -EUCLEAN; +				goto out; +			} +		} + +		leaf = path->nodes[0]; +		slot = path->slots[0]; +		btrfs_item_key_to_cpu(leaf, &key, slot); +		if (key.objectid == subvol_info->treeid && +		    key.type == BTRFS_ROOT_BACKREF_KEY) { +			subvol_info->parent_id = key.offset; + +			rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); +			subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref); + +			item_off = btrfs_item_ptr_offset(leaf, slot) +					+ sizeof(struct btrfs_root_ref); +			item_len = btrfs_item_size_nr(leaf, slot) +					- sizeof(struct btrfs_root_ref); +			read_extent_buffer(leaf, subvol_info->name, +					   item_off, item_len); +		} else { +			ret = -ENOENT; +			goto out; +		} +	} + +	if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) +		ret = -EFAULT; + +out: +	btrfs_free_path(path); +	kzfree(subvol_info); +	return ret; +} + +/* + * Return ROOT_REF information of the subvolume containing this inode + * except the subvolume name. + */ +static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) +{ +	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs; +	struct btrfs_root_ref *rref; +	struct btrfs_root *root; +	struct btrfs_path *path; +	struct btrfs_key key; +	struct extent_buffer *leaf; +	struct inode *inode; +	u64 objectid; +	int slot; +	int ret; +	u8 found; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	rootrefs = memdup_user(argp, sizeof(*rootrefs)); +	if (IS_ERR(rootrefs)) { +		btrfs_free_path(path); +		return PTR_ERR(rootrefs); +	} + +	inode = file_inode(file); +	root = BTRFS_I(inode)->root->fs_info->tree_root; +	objectid = BTRFS_I(inode)->root->root_key.objectid; + +	key.objectid = objectid; +	key.type = BTRFS_ROOT_REF_KEY; +	key.offset = rootrefs->min_treeid; +	found = 0; + +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (ret < 0) { +		goto out; +	} else if (path->slots[0] >= +		   btrfs_header_nritems(path->nodes[0])) { +		ret = btrfs_next_leaf(root, path); +		if (ret < 0) { +			goto out; +		} else if (ret > 0) { +			ret = -EUCLEAN; +			goto out; +		} +	} +	while (1) { +		leaf = path->nodes[0]; +		slot = path->slots[0]; + +		btrfs_item_key_to_cpu(leaf, &key, slot); +		if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) { +			ret = 0; +			goto out; +		} + +		if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) { +			ret = -EOVERFLOW; +			goto out; +		} + +		rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); +		rootrefs->rootref[found].treeid = key.offset; +		rootrefs->rootref[found].dirid = +				  btrfs_root_ref_dirid(leaf, rref); +		found++; + +		ret = btrfs_next_item(root, path); +		if (ret < 0) { +			goto out; +		} else if (ret > 0) { +			ret = -EUCLEAN; +			goto out; +		} +	} + +out: +	if (!ret || ret == -EOVERFLOW) { +		rootrefs->num_items = found; +		/* update min_treeid for next search */ +		if (found) +			rootrefs->min_treeid = +				rootrefs->rootref[found - 1].treeid + 1; +		if (copy_to_user(argp, rootrefs, sizeof(*rootrefs))) +			ret = -EFAULT; +	} + +	kfree(rootrefs); +	btrfs_free_path(path); + +	return ret; +} +  static noinline int btrfs_ioctl_snap_destroy(struct file *file,  					     void __user *arg)  { @@ -2309,12 +2816,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	struct btrfs_root *root = BTRFS_I(dir)->root;  	struct btrfs_root *dest = NULL;  	struct btrfs_ioctl_vol_args *vol_args; -	struct btrfs_trans_handle *trans; -	struct btrfs_block_rsv block_rsv; -	u64 root_flags; -	u64 qgroup_reserved;  	int namelen; -	int ret;  	int err = 0;  	if (!S_ISDIR(dir->i_mode)) @@ -2398,133 +2900,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	}  	inode_lock(inode); - -	/* -	 * Don't allow to delete a subvolume with send in progress. This is -	 * inside the i_mutex so the error handling that has to drop the bit -	 * again is not run concurrently. -	 */ -	spin_lock(&dest->root_item_lock); -	root_flags = btrfs_root_flags(&dest->root_item); -	if (dest->send_in_progress == 0) { -		btrfs_set_root_flags(&dest->root_item, -				root_flags | BTRFS_ROOT_SUBVOL_DEAD); -		spin_unlock(&dest->root_item_lock); -	} else { -		spin_unlock(&dest->root_item_lock); -		btrfs_warn(fs_info, -			   "Attempt to delete subvolume %llu during send", -			   dest->root_key.objectid); -		err = -EPERM; -		goto out_unlock_inode; -	} - -	down_write(&fs_info->subvol_sem); - -	err = may_destroy_subvol(dest); -	if (err) -		goto out_up_write; - -	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); -	/* -	 * One for dir inode, two for dir entries, two for root -	 * ref/backref. -	 */ -	err = btrfs_subvolume_reserve_metadata(root, &block_rsv, -					       5, &qgroup_reserved, true); -	if (err) -		goto out_up_write; - -	trans = btrfs_start_transaction(root, 0); -	if (IS_ERR(trans)) { -		err = PTR_ERR(trans); -		goto out_release; -	} -	trans->block_rsv = &block_rsv; -	trans->bytes_reserved = block_rsv.size; - -	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); - -	ret = btrfs_unlink_subvol(trans, root, dir, -				dest->root_key.objectid, -				dentry->d_name.name, -				dentry->d_name.len); -	if (ret) { -		err = ret; -		btrfs_abort_transaction(trans, ret); -		goto out_end_trans; -	} - -	btrfs_record_root_in_trans(trans, dest); - -	memset(&dest->root_item.drop_progress, 0, -		sizeof(dest->root_item.drop_progress)); -	dest->root_item.drop_level = 0; -	btrfs_set_root_refs(&dest->root_item, 0); - -	if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) { -		ret = btrfs_insert_orphan_item(trans, -					fs_info->tree_root, -					dest->root_key.objectid); -		if (ret) { -			btrfs_abort_transaction(trans, ret); -			err = ret; -			goto out_end_trans; -		} -	} - -	ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid, -				  BTRFS_UUID_KEY_SUBVOL, -				  dest->root_key.objectid); -	if (ret && ret != -ENOENT) { -		btrfs_abort_transaction(trans, ret); -		err = ret; -		goto out_end_trans; -	} -	if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { -		ret = btrfs_uuid_tree_rem(trans, fs_info, -					  dest->root_item.received_uuid, -					  BTRFS_UUID_KEY_RECEIVED_SUBVOL, -					  dest->root_key.objectid); -		if (ret && ret != -ENOENT) { -			btrfs_abort_transaction(trans, ret); -			err = ret; -			goto out_end_trans; -		} -	} - -out_end_trans: -	trans->block_rsv = NULL; -	trans->bytes_reserved = 0; -	ret = btrfs_end_transaction(trans); -	if (ret && !err) -		err = ret; -	inode->i_flags |= S_DEAD; -out_release: -	btrfs_subvolume_release_metadata(fs_info, &block_rsv); -out_up_write: -	up_write(&fs_info->subvol_sem); -	if (err) { -		spin_lock(&dest->root_item_lock); -		root_flags = btrfs_root_flags(&dest->root_item); -		btrfs_set_root_flags(&dest->root_item, -				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); -		spin_unlock(&dest->root_item_lock); -	} -out_unlock_inode: +	err = btrfs_delete_subvolume(dir, dentry);  	inode_unlock(inode); -	if (!err) { -		d_invalidate(dentry); -		btrfs_invalidate_inodes(dest); +	if (!err)  		d_delete(dentry); -		ASSERT(dest->send_in_progress == 0); -		/* the last ref */ -		if (dest->ino_cache_inode) { -			iput(dest->ino_cache_inode); -			dest->ino_cache_inode = NULL; -		} -	}  out_dput:  	dput(dentry);  out_unlock_dir: @@ -2613,7 +2993,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)  	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))  		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; -	mutex_lock(&fs_info->volume_mutex);  	vol_args = memdup_user(arg, sizeof(*vol_args));  	if (IS_ERR(vol_args)) {  		ret = PTR_ERR(vol_args); @@ -2628,7 +3007,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)  	kfree(vol_args);  out: -	mutex_unlock(&fs_info->volume_mutex);  	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);  	return ret;  } @@ -2654,8 +3032,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)  	}  	/* Check for compatibility reject unknown flags */ -	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) -		return -EOPNOTSUPP; +	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) { +		ret = -EOPNOTSUPP; +		goto out; +	}  	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {  		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; @@ -2954,8 +3334,6 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)  			put_page(pg);  		}  	} -	kfree(cmp->src_pages); -	kfree(cmp->dst_pages);  }  static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, @@ -2964,40 +3342,14 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,  {  	int ret;  	int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; -	struct page **src_pgarr, **dst_pgarr; -	/* -	 * We must gather up all the pages before we initiate our -	 * extent locking. We use an array for the page pointers. Size -	 * of the array is bounded by len, which is in turn bounded by -	 * BTRFS_MAX_DEDUPE_LEN. -	 */ -	src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); -	dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); -	if (!src_pgarr || !dst_pgarr) { -		kfree(src_pgarr); -		kfree(dst_pgarr); -		return -ENOMEM; -	}  	cmp->num_pages = num_pages; -	cmp->src_pages = src_pgarr; -	cmp->dst_pages = dst_pgarr; - -	/* -	 * If deduping ranges in the same inode, locking rules make it mandatory -	 * to always lock pages in ascending order to avoid deadlocks with -	 * concurrent tasks (such as starting writeback/delalloc). -	 */ -	if (src == dst && dst_loff < loff) { -		swap(src_pgarr, dst_pgarr); -		swap(loff, dst_loff); -	} -	ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff); +	ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);  	if (ret)  		goto out; -	ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff); +	ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);  out:  	if (ret) @@ -3067,31 +3419,23 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,  	return 0;  } -static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, -			     struct inode *dst, u64 dst_loff) +static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, +				   struct inode *dst, u64 dst_loff, +				   struct cmp_pages *cmp)  {  	int ret;  	u64 len = olen; -	struct cmp_pages cmp;  	bool same_inode = (src == dst);  	u64 same_lock_start = 0;  	u64 same_lock_len = 0; -	if (len == 0) -		return 0; - -	if (same_inode) -		inode_lock(src); -	else -		btrfs_double_inode_lock(src, dst); -  	ret = extent_same_check_offsets(src, loff, &len, olen);  	if (ret) -		goto out_unlock; +		return ret;  	ret = extent_same_check_offsets(dst, dst_loff, &len, olen);  	if (ret) -		goto out_unlock; +		return ret;  	if (same_inode) {  		/* @@ -3108,32 +3452,21 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,  		 * allow an unaligned length so long as it ends at  		 * i_size.  		 */ -		if (len != olen) { -			ret = -EINVAL; -			goto out_unlock; -		} +		if (len != olen) +			return -EINVAL;  		/* Check for overlapping ranges */ -		if (dst_loff + len > loff && dst_loff < loff + len) { -			ret = -EINVAL; -			goto out_unlock; -		} +		if (dst_loff + len > loff && dst_loff < loff + len) +			return -EINVAL;  		same_lock_start = min_t(u64, loff, dst_loff);  		same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;  	} -	/* don't make the dst file partly checksummed */ -	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != -	    (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { -		ret = -EINVAL; -		goto out_unlock; -	} -  again: -	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); +	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);  	if (ret) -		goto out_unlock; +		return ret;  	if (same_inode)  		ret = lock_extent_range(src, same_lock_start, same_lock_len, @@ -3154,7 +3487,7 @@ again:  		 * Ranges in the io trees already unlocked. Now unlock all  		 * pages before waiting for all IO to complete.  		 */ -		btrfs_cmp_data_free(&cmp); +		btrfs_cmp_data_free(cmp);  		if (same_inode) {  			btrfs_wait_ordered_range(src, same_lock_start,  						 same_lock_len); @@ -3167,12 +3500,12 @@ again:  	ASSERT(ret == 0);  	if (WARN_ON(ret)) {  		/* ranges in the io trees already unlocked */ -		btrfs_cmp_data_free(&cmp); +		btrfs_cmp_data_free(cmp);  		return ret;  	}  	/* pass original length for comparison so we stay within i_size */ -	ret = btrfs_cmp_data(olen, &cmp); +	ret = btrfs_cmp_data(olen, cmp);  	if (ret == 0)  		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); @@ -3182,18 +3515,91 @@ again:  	else  		btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); -	btrfs_cmp_data_free(&cmp); +	btrfs_cmp_data_free(cmp); + +	return ret; +} + +#define BTRFS_MAX_DEDUPE_LEN	SZ_16M + +static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, +			     struct inode *dst, u64 dst_loff) +{ +	int ret; +	struct cmp_pages cmp; +	int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT; +	bool same_inode = (src == dst); +	u64 i, tail_len, chunk_count; + +	if (olen == 0) +		return 0; + +	if (same_inode) +		inode_lock(src); +	else +		btrfs_double_inode_lock(src, dst); + +	/* don't make the dst file partly checksummed */ +	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != +	    (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { +		ret = -EINVAL; +		goto out_unlock; +	} + +	tail_len = olen % BTRFS_MAX_DEDUPE_LEN; +	chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); +	if (chunk_count == 0) +		num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT; + +	/* +	 * If deduping ranges in the same inode, locking rules make it +	 * mandatory to always lock pages in ascending order to avoid deadlocks +	 * with concurrent tasks (such as starting writeback/delalloc). +	 */ +	if (same_inode && dst_loff < loff) +		swap(loff, dst_loff); + +	/* +	 * We must gather up all the pages before we initiate our extent +	 * locking. We use an array for the page pointers. Size of the array is +	 * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN. +	 */ +	cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *), +				       GFP_KERNEL | __GFP_ZERO); +	cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *), +				       GFP_KERNEL | __GFP_ZERO); +	if (!cmp.src_pages || !cmp.dst_pages) { +		ret = -ENOMEM; +		goto out_free; +	} + +	for (i = 0; i < chunk_count; i++) { +		ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, +					      dst, dst_loff, &cmp); +		if (ret) +			goto out_unlock; + +		loff += BTRFS_MAX_DEDUPE_LEN; +		dst_loff += BTRFS_MAX_DEDUPE_LEN; +	} + +	if (tail_len > 0) +		ret = btrfs_extent_same_range(src, loff, tail_len, dst, +					      dst_loff, &cmp); +  out_unlock:  	if (same_inode)  		inode_unlock(src);  	else  		btrfs_double_inode_unlock(src, dst); +out_free: +	kvfree(cmp.src_pages); +	kvfree(cmp.dst_pages); +  	return ret;  } -#define BTRFS_MAX_DEDUPE_LEN	SZ_16M -  ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,  				struct file *dst_file, u64 dst_loff)  { @@ -3202,9 +3608,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,  	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;  	ssize_t res; -	if (olen > BTRFS_MAX_DEDUPE_LEN) -		olen = BTRFS_MAX_DEDUPE_LEN; -  	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {  		/*  		 * Btrfs does not support blocksize < page_size. As a @@ -3826,11 +4229,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,  	    src->i_sb != inode->i_sb)  		return -EXDEV; -	/* don't make the dst file partly checksummed */ -	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != -	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) -		return -EINVAL; -  	if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))  		return -EISDIR; @@ -3840,6 +4238,13 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,  		inode_lock(src);  	} +	/* don't make the dst file partly checksummed */ +	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != +	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { +		ret = -EINVAL; +		goto out_unlock; +	} +  	/* determine range to clone */  	ret = -EINVAL;  	if (off + len > src->i_size || off + len < off) @@ -4007,8 +4412,8 @@ out:  	return ret;  } -void btrfs_get_block_group_info(struct list_head *groups_list, -				struct btrfs_ioctl_space_info *space) +static void get_block_group_info(struct list_head *groups_list, +				 struct btrfs_ioctl_space_info *space)  {  	struct btrfs_block_group_cache *block_group; @@ -4124,8 +4529,8 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,  		down_read(&info->groups_sem);  		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {  			if (!list_empty(&info->block_groups[c])) { -				btrfs_get_block_group_info( -					&info->block_groups[c], &space); +				get_block_group_info(&info->block_groups[c], +						     &space);  				memcpy(dest, &space, sizeof(space));  				dest++;  				space_args.total_spaces++; @@ -4490,14 +4895,14 @@ out_loi:  	return ret;  } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, +void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,  			       struct btrfs_ioctl_balance_args *bargs)  {  	struct btrfs_balance_control *bctl = fs_info->balance_ctl;  	bargs->flags = bctl->flags; -	if (atomic_read(&fs_info->balance_running)) +	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))  		bargs->state |= BTRFS_BALANCE_STATE_RUNNING;  	if (atomic_read(&fs_info->balance_pause_req))  		bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; @@ -4508,13 +4913,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,  	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));  	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); -	if (lock) { -		spin_lock(&fs_info->balance_lock); -		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); -		spin_unlock(&fs_info->balance_lock); -	} else { -		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); -	} +	spin_lock(&fs_info->balance_lock); +	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); +	spin_unlock(&fs_info->balance_lock);  }  static long btrfs_ioctl_balance(struct file *file, void __user *arg) @@ -4535,7 +4936,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)  again:  	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { -		mutex_lock(&fs_info->volume_mutex);  		mutex_lock(&fs_info->balance_mutex);  		need_unlock = true;  		goto locked; @@ -4550,21 +4950,22 @@ again:  	mutex_lock(&fs_info->balance_mutex);  	if (fs_info->balance_ctl) {  		/* this is either (2) or (3) */ -		if (!atomic_read(&fs_info->balance_running)) { +		if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {  			mutex_unlock(&fs_info->balance_mutex); -			if (!mutex_trylock(&fs_info->volume_mutex)) -				goto again; +			/* +			 * Lock released to allow other waiters to continue, +			 * we'll reexamine the status again. +			 */  			mutex_lock(&fs_info->balance_mutex);  			if (fs_info->balance_ctl && -			    !atomic_read(&fs_info->balance_running)) { +			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {  				/* this is (3) */  				need_unlock = false;  				goto locked;  			}  			mutex_unlock(&fs_info->balance_mutex); -			mutex_unlock(&fs_info->volume_mutex);  			goto again;  		} else {  			/* this is (2) */ @@ -4617,7 +5018,6 @@ locked:  		goto out_bargs;  	} -	bctl->fs_info = fs_info;  	if (arg) {  		memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));  		memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); @@ -4636,14 +5036,14 @@ locked:  do_balance:  	/* -	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP -	 * goes to to btrfs_balance.  bctl is freed in __cancel_balance, -	 * or, if restriper was paused all the way until unmount, in -	 * free_fs_info.  The flag is cleared in __cancel_balance. +	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to +	 * btrfs_balance.  bctl is freed in reset_balance_state, or, if +	 * restriper was paused all the way until unmount, in free_fs_info. +	 * The flag should be cleared after reset_balance_state.  	 */  	need_unlock = false; -	ret = btrfs_balance(bctl, bargs); +	ret = btrfs_balance(fs_info, bctl, bargs);  	bctl = NULL;  	if (arg) { @@ -4657,7 +5057,6 @@ out_bargs:  	kfree(bargs);  out_unlock:  	mutex_unlock(&fs_info->balance_mutex); -	mutex_unlock(&fs_info->volume_mutex);  	if (need_unlock)  		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);  out: @@ -4701,7 +5100,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,  		goto out;  	} -	update_ioctl_balance_args(fs_info, 1, bargs); +	btrfs_update_ioctl_balance_args(fs_info, bargs);  	if (copy_to_user(arg, bargs, sizeof(*bargs)))  		ret = -EFAULT; @@ -4996,7 +5395,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_root_item *root_item = &root->root_item;  	struct btrfs_trans_handle *trans; -	struct timespec ct = current_time(inode); +	struct timespec64 ct = current_time(inode);  	int ret = 0;  	int received_uuid_changed; @@ -5038,8 +5437,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,  				       BTRFS_UUID_SIZE);  	if (received_uuid_changed &&  	    !btrfs_is_empty_uuid(root_item->received_uuid)) { -		ret = btrfs_uuid_tree_rem(trans, fs_info, -					  root_item->received_uuid, +		ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,  					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,  					  root->root_key.objectid);  		if (ret && ret != -ENOENT) { @@ -5063,7 +5461,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,  		goto out;  	}  	if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { -		ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid, +		ret = btrfs_uuid_tree_add(trans, sa->uuid,  					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,  					  root->root_key.objectid);  		if (ret < 0 && ret != -EEXIST) { @@ -5497,7 +5895,7 @@ long btrfs_ioctl(struct file *file, unsigned int  	case BTRFS_IOC_SYNC: {  		int ret; -		ret = btrfs_start_delalloc_roots(fs_info, 0, -1); +		ret = btrfs_start_delalloc_roots(fs_info, -1);  		if (ret)  			return ret;  		ret = btrfs_sync_fs(inode->i_sb, 1); @@ -5565,6 +5963,16 @@ long btrfs_ioctl(struct file *file, unsigned int  		return btrfs_ioctl_get_features(file, argp);  	case BTRFS_IOC_SET_FEATURES:  		return btrfs_ioctl_set_features(file, argp); +	case FS_IOC_FSGETXATTR: +		return btrfs_ioctl_fsgetxattr(file, argp); +	case FS_IOC_FSSETXATTR: +		return btrfs_ioctl_fssetxattr(file, argp); +	case BTRFS_IOC_GET_SUBVOL_INFO: +		return btrfs_ioctl_get_subvol_info(file, argp); +	case BTRFS_IOC_GET_SUBVOL_ROOTREF: +		return btrfs_ioctl_get_subvol_rootref(file, argp); +	case BTRFS_IOC_INO_LOOKUP_USER: +		return btrfs_ioctl_ino_lookup_user(file, argp);  	}  	return -ENOTTY;  | 
