From 1383a7ed67490fb00d793e36c7a4d599ff88a64d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:40:31 +1100 Subject: vfs: check file ranges before cloning files Move the file range checks from vfs_clone_file_prep into a separate generic_remap_checks function so that all the checks are collected in a central location. This forms the basis for adding more checks from generic_write_checks that will make cloning's input checking more consistent with write input checking. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 897eae8faee1..ba93a6e7dac4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, - struct inode *inode_out, loff_t pos_out, - u64 *len, bool is_dedupe); +extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *count, bool is_dedupe); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, @@ -2967,6 +2967,9 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +extern int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + uint64_t *count, bool is_dedupe); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit v1.2.3 From a83ab01a62e61616ebb8b97f90f568c1214dc10d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:08 +1100 Subject: vfs: rename vfs_clone_file_prep to be more descriptive The vfs_clone_file_prep is a generic function to be called by filesystem implementations only. Rename the prefix to generic_ and make it more clear that it applies to remap operations, not just clones. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/ocfs2/refcounttree.c | 2 +- fs/read_write.c | 8 ++++---- fs/xfs/xfs_reflink.c | 2 +- include/linux/fs.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19e03936c5e1..36c56dfbe485 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) goto out_unlock; - ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; diff --git a/fs/read_write.c b/fs/read_write.c index f5395d8da741..aca75a97a695 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1745,9 +1745,9 @@ static int generic_remap_check_len(struct inode *inode_in, * Returns: 0 for "nothing to clone", 1 for "something to clone", or * the usual negative error code. */ -int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 *len, bool is_dedupe) +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1822,7 +1822,7 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, return 1; } -EXPORT_SYMBOL(vfs_clone_file_prep); +EXPORT_SYMBOL(generic_remap_file_range_prep); int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 281d5f53f2ec..a7757a128a78 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, is_dedupe); if (ret <= 0) goto out_unlock; diff --git a/include/linux/fs.h b/include/linux/fs.h index ba93a6e7dac4..55729e1c2e75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); +extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *count, bool is_dedupe); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3 From 2e5dfc99f2e61c42083ba742395e7a7b353513d1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:21 +1100 Subject: vfs: combine the clone and dedupe into a single remap_file_range Combine the clone_file_range and dedupe_file_range operations into a single remap_file_range file operation dispatch since they're fundamentally the same operation. The differences between the two can be made in the prep functions. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- Documentation/filesystems/porting | 5 +++++ Documentation/filesystems/vfs.txt | 20 +++++++++++------ fs/btrfs/ctree.h | 8 +++---- fs/btrfs/file.c | 3 +-- fs/btrfs/ioctl.c | 45 ++++++++++++++++++++------------------- fs/cifs/cifsfs.c | 22 +++++++++++-------- fs/nfs/nfs4file.c | 10 ++++++--- fs/ocfs2/file.c | 24 +++++++-------------- fs/overlayfs/file.c | 30 +++++++++++++++----------- fs/read_write.c | 18 ++++++++-------- fs/xfs/xfs_file.c | 23 ++++++-------------- include/linux/fs.h | 25 ++++++++++++++++++---- 12 files changed, 127 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 7b7b845c490a..e6d4466268dd 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -622,3 +622,8 @@ in your dentry operations instead. alloc_file_clone(file, flags, ops) does not affect any caller's references. On success you get a new struct file sharing the mount/dentry with the original, on failure - ERR_PTR(). +-- +[mandatory] + ->clone_file_range() and ->dedupe_file_range have been replaced with + ->remap_file_range(). See Documentation/filesystems/vfs.txt for more + information. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index a6c6a8af48a2..6f5babfee27b 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -883,8 +883,9 @@ struct file_operations { unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64); + int (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }; @@ -960,11 +961,16 @@ otherwise noted. copy_file_range: called by the copy_file_range(2) system call. - clone_file_range: called by the ioctl(2) system call for FICLONERANGE and - FICLONE commands. - - dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE - command. + remap_file_range: called by the ioctl(2) system call for FICLONERANGE and + FICLONE and FIDEDUPERANGE commands to remap file ranges. An + implementation should remap len bytes at pos_in of the source file into + the dest file at pos_out. Implementations must handle callers passing + in len == 0; this means "remap to the end of the source file". The + return value should be zero if all bytes were remapped, or the usual + negative error code if the remapping did not succeed completely. + The remap_flags parameter accepts REMAP_FILE_* flags. If + REMAP_FILE_DEDUP is set then the implementation must only remap if the + requested file ranges have identical contents. fadvise: possibly called by the fadvise64() system call. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2cddfe7806a4..124a05662fc2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3218,9 +3218,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen); /* file.c */ int __init btrfs_auto_defrag_init(void); @@ -3250,8 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); +int btrfs_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len, + unsigned int remap_flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2be00e873e92..9a963f061393 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3269,8 +3269,7 @@ const struct file_operations btrfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, #endif - .clone_file_range = btrfs_clone_file_range, - .dedupe_file_range = btrfs_dedupe_file_range, + .remap_file_range = btrfs_remap_file_range, }; void __cold btrfs_auto_defrag_exit(void) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d60b6caf09e8..bfd99c66723e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3627,26 +3627,6 @@ out_unlock: return ret; } -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen) -{ - struct inode *src = file_inode(src_file); - struct inode *dst = file_inode(dst_file); - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; - - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { - /* - * Btrfs does not support blocksize < page_size. As a - * result, btrfs_cmp_data() won't correctly handle - * this situation without an update. - */ - return -EINVAL; - } - - return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); -} - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, @@ -4348,9 +4328,30 @@ out_unlock: return ret; } -int btrfs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +int btrfs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len, + unsigned int remap_flags) { + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) { + struct inode *src = file_inode(src_file); + struct inode *dst = file_inode(dst_file); + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + + if (WARN_ON_ONCE(bs < PAGE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + return -EINVAL; + } + + return btrfs_extent_same(src, off, len, dst, destoff); + } + return btrfs_clone_files(dst_file, src_file, off, len, destoff); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 7065426b3280..e8144d0dcde2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -975,8 +975,9 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; -static int cifs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +static int cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len, + unsigned int remap_flags) { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); @@ -986,6 +987,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + cifs_dbg(FYI, "clone range\n"); xid = get_xid(); @@ -1134,7 +1138,7 @@ const struct file_operations cifs_file_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1153,7 +1157,7 @@ const struct file_operations cifs_file_strict_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1172,7 +1176,7 @@ const struct file_operations cifs_file_direct_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1191,7 +1195,7 @@ const struct file_operations cifs_file_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1209,7 +1213,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1227,7 +1231,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1239,7 +1243,7 @@ const struct file_operations cifs_dir_ops = { .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = generic_file_llseek, .fsync = cifs_dir_fsync, }; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4288a6ecaf75..ae5780ce41dc 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count) +static int nfs42_remap_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, u64 count, + unsigned int remap_flags) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); @@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, bool same_inode = false; int ret; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + /* check alignment w.r.t. clone_blksize */ ret = -EINVAL; if (bs) { @@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = { .copy_file_range = nfs4_copy_file_range, .llseek = nfs4_file_llseek, .fallocate = nfs42_fallocate, - .clone_file_range = nfs42_clone_file_range, + .remap_file_range = nfs42_remap_file_range, #else .llseek = nfs_file_llseek, #endif diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9fa35cb6f6e0..0b757a24567c 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2527,24 +2527,18 @@ out: return offset; } -static int ocfs2_file_clone_range(struct file *file_in, +static int ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int remap_flags) { - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; -static int ocfs2_file_dedupe_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + len, remap_flags & REMAP_FILE_DEDUP); } const struct inode_operations ocfs2_file_iops = { @@ -2586,8 +2580,7 @@ const struct file_operations ocfs2_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops = { @@ -2633,8 +2626,7 @@ const struct file_operations ocfs2_fops_no_plocks = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops_no_plocks = { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 986313da0c88..fffb36fd5920 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -489,26 +489,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, OVL_COPY); } -static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +static int ovl_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags) { - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_CLONE); -} + enum ovl_copyop op; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) + op = OVL_DEDUPE; + else + op = OVL_CLONE; -static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) -{ /* * Don't copy up because of a dedupe request, this wouldn't make sense * most of the time (data would be duplicated instead of deduplicated). */ - if (!ovl_inode_upper(file_inode(file_in)) || - !ovl_inode_upper(file_inode(file_out))) + if (op == OVL_DEDUPE && + (!ovl_inode_upper(file_inode(file_in)) || + !ovl_inode_upper(file_inode(file_out)))) return -EPERM; return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_DEDUPE); + op); } const struct file_operations ovl_file_operations = { @@ -525,6 +530,5 @@ const struct file_operations ovl_file_operations = { .compat_ioctl = ovl_compat_ioctl, .copy_file_range = ovl_copy_file_range, - .clone_file_range = ovl_clone_file_range, - .dedupe_file_range = ovl_dedupe_file_range, + .remap_file_range = ovl_remap_file_range, }; diff --git a/fs/read_write.c b/fs/read_write.c index 734c5661fb69..766bdcb381f3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1588,9 +1588,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->clone_file_range) { - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); + if (file_in->f_op->remap_file_range) { + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, 0); if (ret == 0) { ret = len; goto done; @@ -1849,7 +1849,7 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - if (!file_in->f_op->clone_file_range) + if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; ret = remap_verify_area(file_in, pos_in, len, false); @@ -1860,8 +1860,8 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, if (ret) return ret; - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, 0); if (!ret) { fsnotify_access(file_in); fsnotify_modify(file_out); @@ -2006,7 +2006,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, goto out_drop_write; ret = -EINVAL; - if (!dst_file->f_op->dedupe_file_range) + if (!dst_file->f_op->remap_file_range) goto out_drop_write; if (len == 0) { @@ -2014,8 +2014,8 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, goto out_drop_write; } - ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, - dst_file, dst_pos, len); + ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, + dst_pos, len, REMAP_FILE_DEDUP); out_drop_write: mnt_drop_write_file(dst_file); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 61a5ad2600e8..2ad94d508f80 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -920,27 +920,19 @@ out_unlock: } STATIC int -xfs_file_clone_range( +xfs_file_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int remap_flags) { - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; -STATIC int -xfs_file_dedupe_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + len, remap_flags & REMAP_FILE_DEDUP); } STATIC int @@ -1175,8 +1167,7 @@ const struct file_operations xfs_file_operations = { .fsync = xfs_file_fsync, .get_unmapped_area = thp_get_unmapped_area, .fallocate = xfs_file_fallocate, - .clone_file_range = xfs_file_clone_range, - .dedupe_file_range = xfs_file_dedupe_range, + .remap_file_range = xfs_file_remap_range, }; const struct file_operations xfs_dir_file_operations = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 55729e1c2e75..888cef35c7d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1721,6 +1721,24 @@ struct block_device_operations; #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) +/* + * These flags control the behavior of the remap_file_range function pointer. + * If it is called with len == 0 that means "remap to end of source file". + * See Documentation/filesystems/vfs.txt for more details about this call. + * + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + */ +#define REMAP_FILE_DEDUP (1 << 0) + +/* + * These flags signal that the caller is ok with altering various aspects of + * the behavior of the remap operation. The changes must be made by the + * implementation; the vfs remap helper functions can take advantage of them. + * Flags in this category exist to preserve the quirky behavior of the hoisted + * btrfs clone/dedupe ioctls. + * There are no flags yet, but subsequent commits will add some. + */ +#define REMAP_FILE_ADVISORY (0) struct iov_iter; @@ -1759,10 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); + int (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; -- cgit v1.2.3 From a91ae49bbaf43910edb09e03fedf26b23875bd52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:28 +1100 Subject: vfs: pass remap flags to generic_remap_file_range_prep Plumb the remap flags through the filesystem from the vfs function dispatcher all the way to the prep function to prepare for behavior changes in subsequent patches. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/ocfs2/file.c | 2 +- fs/ocfs2/refcounttree.c | 4 ++-- fs/ocfs2/refcounttree.h | 2 +- fs/read_write.c | 14 +++++++------- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_reflink.c | 21 +++++++++++---------- fs/xfs/xfs_reflink.h | 3 ++- include/linux/fs.h | 2 +- 8 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0b757a24567c..9809b0e5746f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2538,7 +2538,7 @@ static int ocfs2_remap_file_range(struct file *file_in, return -EINVAL; return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags & REMAP_FILE_DEDUP); + len, remap_flags); } const struct inode_operations ocfs2_file_iops = { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 36c56dfbe485..df9781567ec0 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4825,7 +4825,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -4851,7 +4851,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, remap_flags); if (ret <= 0) goto out_unlock; diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 4af55bf4b35b..d2c5f526edff 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -120,6 +120,6 @@ int ocfs2_reflink_remap_range(struct file *file_in, struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe); + unsigned int remap_flags); #endif /* OCFS2_REFCOUNTTREE_H */ diff --git a/fs/read_write.c b/fs/read_write.c index 766bdcb381f3..201381689284 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1722,14 +1722,14 @@ static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, loff_t pos_out, u64 *len, - bool is_dedupe) + unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; if ((*len & blkmask) == 0) return 0; - if (is_dedupe) + if (remap_flags & REMAP_FILE_DEDUP) *len &= ~blkmask; else if (pos_out + *len < i_size_read(inode_out)) return -EINVAL; @@ -1747,7 +1747,7 @@ static int generic_remap_check_len(struct inode *inode_in, */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, bool is_dedupe) + u64 *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1771,7 +1771,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (*len == 0) { loff_t isize = i_size_read(inode_in); - if (is_dedupe || pos_in == isize) + if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) return 0; if (pos_in > isize) return -EINVAL; @@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* Check that we don't violate system file offset limits. */ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, - is_dedupe); + (remap_flags & REMAP_FILE_DEDUP)); if (ret) return ret; @@ -1804,7 +1804,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* * Check that the extents are the same. */ - if (is_dedupe) { + if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; ret = vfs_dedupe_file_range_compare(inode_in, pos_in, @@ -1816,7 +1816,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, - is_dedupe); + remap_flags); if (ret) return ret; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2ad94d508f80..20314eb4677a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -932,7 +932,7 @@ xfs_file_remap_range( return -EINVAL; return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags & REMAP_FILE_DEDUP); + len, remap_flags); } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a7757a128a78..29aab196ce7e 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -921,13 +921,14 @@ xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, - bool is_dedupe) + unsigned int remap_flags) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if ((remap_flags & REMAP_FILE_DEDUP) && + newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -948,7 +949,7 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } - if (!is_dedupe) { + if (!(remap_flags & REMAP_FILE_DEDUP)) { xfs_trans_ichgtime(tp, dest, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } @@ -1296,7 +1297,7 @@ xfs_reflink_remap_prep( struct file *file_out, loff_t pos_out, u64 *len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); @@ -1327,7 +1328,7 @@ xfs_reflink_remap_prep( goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, is_dedupe); + len, remap_flags); if (ret <= 0) goto out_unlock; @@ -1336,7 +1337,7 @@ xfs_reflink_remap_prep( * from the source file so we don't try to dedupe the partial * EOF block. */ - if (is_dedupe) { + if (remap_flags & REMAP_FILE_DEDUP) { *len &= ~blkmask; } else if (*len & blkmask) { /* @@ -1372,7 +1373,7 @@ xfs_reflink_remap_prep( PAGE_ALIGN(pos_out + *len) - 1); /* If we're altering the file contents... */ - if (!is_dedupe) { + if (!(remap_flags & REMAP_FILE_DEDUP)) { /* * ...update the timestamps (which will grab the ilock again * from xfs_fs_dirty_inode, so we have to call it before we @@ -1410,7 +1411,7 @@ xfs_reflink_remap_range( struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); @@ -1430,7 +1431,7 @@ xfs_reflink_remap_range( /* Prepare and then clone file data. */ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, remap_flags); if (ret <= 0) return ret; @@ -1457,7 +1458,7 @@ xfs_reflink_remap_range( cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, - is_dedupe); + remap_flags); out_unlock: xfs_reflink_remap_unlock(file_in, file_out); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index c585ad9552b2..6f82d628bf17 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -28,7 +28,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); + struct file *file_out, loff_t pos_out, u64 len, + unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, diff --git a/include/linux/fs.h b/include/linux/fs.h index 888cef35c7d7..631c28ce1436 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1844,7 +1844,7 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); + u64 *count, unsigned int remap_flags); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3 From 3d28193e1df043764deb7abdaba5e3a6660bc393 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:34 +1100 Subject: vfs: pass remap flags to generic_remap_checks Pass the same remap flags to generic_remap_checks for consistency. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/read_write.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index 201381689284..ebcbfc4f2907 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* Check that we don't violate system file offset limits. */ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, - (remap_flags & REMAP_FILE_DEDUP)); + remap_flags); if (ret) return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 631c28ce1436..c5435ca81132 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2986,7 +2986,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *count, bool is_dedupe); + uint64_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 84b7301e41a0..410dc58f7b16 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks); */ int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *req_count, bool is_dedupe) + uint64_t *req_count, unsigned int remap_flags) { struct inode *inode_in = file_in->f_mapping->host; struct inode *inode_out = file_out->f_mapping->host; @@ -3016,7 +3016,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, size_out = i_size_read(inode_out); /* Dedupe requires both ranges to be within EOF. */ - if (is_dedupe && + if ((remap_flags & REMAP_FILE_DEDUP) && (pos_in >= size_in || pos_in + count > size_in || pos_out >= size_out || pos_out + count > size_out)) return -EINVAL; -- cgit v1.2.3 From 42ec3d4c02187a18e27ff94b409ec27234bf2ffd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:49 +1100 Subject: vfs: make remap_file_range functions take and return bytes completed Change the remap_file_range functions to take a number of bytes to operate upon and return the number of bytes they operated on. This is a requirement for allowing fs implementations to return short clone/dedupe results to the user, which will enable us to obey resource limits in a graceful manner. A subsequent patch will enable copy_file_range to signal to the ->clone_file_range implementation that it can handle a short length, which will be returned in the function's return value. For now the short return is not implemented anywhere so the behavior won't change -- either copy_file_range manages to clone the entire range or it tries an alternative. Neither clone ioctl can take advantage of this, alas. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- Documentation/filesystems/vfs.txt | 10 ++++---- fs/btrfs/ctree.h | 6 ++--- fs/btrfs/ioctl.c | 13 +++++++---- fs/cifs/cifsfs.c | 6 ++--- fs/ioctl.c | 10 +++++++- fs/nfs/nfs4file.c | 6 ++--- fs/nfsd/vfs.c | 8 +++++-- fs/ocfs2/file.c | 16 ++++++------- fs/ocfs2/refcounttree.c | 2 +- fs/ocfs2/refcounttree.h | 2 +- fs/overlayfs/copy_up.c | 6 ++--- fs/overlayfs/file.c | 12 +++++----- fs/read_write.c | 49 +++++++++++++++++++++------------------ fs/xfs/xfs_file.c | 9 ++++--- fs/xfs/xfs_reflink.c | 4 ++-- fs/xfs/xfs_reflink.h | 2 +- include/linux/fs.h | 27 +++++++++++---------- mm/filemap.c | 2 +- 18 files changed, 108 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6f5babfee27b..1bd2919deaca 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -883,9 +883,9 @@ struct file_operations { unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*remap_file_range)(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }; @@ -966,8 +966,8 @@ otherwise noted. implementation should remap len bytes at pos_in of the source file into the dest file at pos_out. Implementations must handle callers passing in len == 0; this means "remap to the end of the source file". The - return value should be zero if all bytes were remapped, or the usual - negative error code if the remapping did not succeed completely. + return value should the number of bytes remapped, or the usual + negative error code if errors occurred before any bytes were remapped. The remap_flags parameter accepts REMAP_FILE_* flags. If REMAP_FILE_DEDUP is set then the implementation must only remap if the requested file ranges have identical contents. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 124a05662fc2..771a961d77ad 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3247,9 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -int btrfs_remap_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, - unsigned int remap_flags); +loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bfd99c66723e..b0c513e10977 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4328,10 +4328,12 @@ out_unlock: return ret; } -int btrfs_remap_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len, +loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; @@ -4349,10 +4351,11 @@ int btrfs_remap_file_range(struct file *src_file, loff_t off, return -EINVAL; } - return btrfs_extent_same(src, off, len, dst, destoff); + ret = btrfs_extent_same(src, off, len, dst, destoff); + } else { + ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); } - - return btrfs_clone_files(dst_file, src_file, off, len, destoff); + return ret < 0 ? ret : len; } static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e8144d0dcde2..5ca71c6c8be2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -975,8 +975,8 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; -static int cifs_remap_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len, +static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) { struct inode *src_inode = file_inode(src_file); @@ -1029,7 +1029,7 @@ static int cifs_remap_file_range(struct file *src_file, loff_t off, unlock_two_nondirectories(src_inode, target_inode); out: free_xid(xid); - return rc; + return rc < 0 ? rc : len; } ssize_t cifs_file_copychunk_range(unsigned int xid, diff --git a/fs/ioctl.c b/fs/ioctl.c index 2005529af560..72537b68c272 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { struct fd src_file = fdget(srcfd); + loff_t cloned; int ret; if (!src_file.file) @@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, + olen); + if (cloned < 0) + ret = cloned; + else if (olen && cloned != olen) + ret = -EINVAL; + else + ret = 0; fdput: fdput(src_file); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index ae5780ce41dc..46d691ba04bc 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -180,8 +180,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static int nfs42_remap_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count, +static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, loff_t count, unsigned int remap_flags) { struct inode *dst_inode = file_inode(dst_file); @@ -244,7 +244,7 @@ out_unlock: inode_unlock(src_inode); } out: - return ret; + return ret < 0 ? ret : count; } #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b53e76391e52..ac6cb6101cbe 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + loff_t cloned; + + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count); + if (count && cloned != count) + cloned = -EINVAL; + return nfserrno(cloned < 0 ? cloned : 0); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9809b0e5746f..fbaeafe44b5f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2527,18 +2527,18 @@ out: return offset; } -static int ocfs2_remap_file_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - unsigned int remap_flags) +static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags); + ret = ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, + len, remap_flags); + return ret < 0 ? ret : len; } const struct inode_operations ocfs2_file_iops = { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index df9781567ec0..6a42c04ac0ab 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4824,7 +4824,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index d2c5f526edff..eb65c1d0843c 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -119,7 +119,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags); #endif /* OCFS2_REFCOUNTTREE_H */ diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 1cc797a08a5b..8750b7235516 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) struct file *new_file; loff_t old_pos = 0; loff_t new_pos = 0; + loff_t cloned; int error = 0; if (len == 0) @@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = do_clone_file_range(old_file, 0, new_file, 0, len); - if (!error) + cloned = do_clone_file_range(old_file, 0, new_file, 0, len); + if (cloned == len) goto out; /* Couldn't clone, so now we try to copy the data */ - error = 0; /* FIXME: copy up sparse files efficiently */ while (len) { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index fffb36fd5920..6c3fec6168e9 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -434,14 +434,14 @@ enum ovl_copyop { OVL_DEDUPE, }; -static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, +static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, unsigned int flags, enum ovl_copyop op) + loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); struct fd real_in, real_out; const struct cred *old_cred; - ssize_t ret; + loff_t ret; ret = ovl_real_fdget(file_out, &real_out); if (ret) @@ -489,9 +489,9 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, OVL_COPY); } -static int ovl_remap_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags) +static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { enum ovl_copyop op; diff --git a/fs/read_write.c b/fs/read_write.c index b61bd3fc7154..356641afa487 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1589,10 +1589,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * more efficient if both clone and copy are supported (e.g. NFS). */ if (file_in->f_op->remap_file_range) { - ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, len, 0); - if (ret == 0) { - ret = len; + loff_t cloned; + + cloned = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, + min_t(loff_t, MAX_RW_COUNT, len), 0); + if (cloned > 0) { + ret = cloned; goto done; } } @@ -1686,11 +1689,12 @@ out2: return ret; } -static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write) +static int remap_verify_area(struct file *file, loff_t pos, loff_t len, + bool write) { struct inode *inode = file_inode(file); - if (unlikely(pos < 0)) + if (unlikely(pos < 0 || len < 0)) return -EINVAL; if (unlikely((loff_t) (pos + len) < 0)) @@ -1721,7 +1725,7 @@ static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write) static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, loff_t pos_out, - u64 *len, + loff_t *len, unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; @@ -1747,7 +1751,7 @@ static int generic_remap_check_len(struct inode *inode_in, */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, unsigned int remap_flags) + loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1843,12 +1847,12 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_remap_file_range_prep); -int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); - int ret; + loff_t ret; if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; @@ -1881,19 +1885,19 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, len, 0); - if (!ret) { - fsnotify_access(file_in); - fsnotify_modify(file_out); - } + if (ret < 0) + return ret; + fsnotify_access(file_in); + fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(do_clone_file_range); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t len) { - int ret; + loff_t ret; file_start_write(file_out); ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); @@ -1999,10 +2003,11 @@ out_error: } EXPORT_SYMBOL(vfs_dedupe_file_range_compare); -int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, u64 len) +loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len) { - s64 ret; + loff_t ret; ret = mnt_want_write_file(dst_file); if (ret) @@ -2051,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) int i; int ret; u16 count = same->dest_count; - int deduped; + loff_t deduped; if (!(file->f_mode & FMODE_READ)) return -EINVAL; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 20314eb4677a..38fde4e11714 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -919,20 +919,23 @@ out_unlock: return error; } -STATIC int +STATIC loff_t xfs_file_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, + ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, remap_flags); + return ret < 0 ? ret : len; } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 2d7dd8b28d7c..3dbe5fb7e9c0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1296,7 +1296,7 @@ xfs_reflink_remap_prep( loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, + loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); @@ -1387,7 +1387,7 @@ xfs_reflink_remap_range( loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 6f82d628bf17..c3c46c276fe1 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -28,7 +28,7 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, + struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared); diff --git a/include/linux/fs.h b/include/linux/fs.h index c5435ca81132..c72d8c3c065a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1777,9 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*remap_file_range)(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -1844,19 +1844,22 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, unsigned int remap_flags); -extern int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + loff_t *count, + unsigned int remap_flags); +extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); +extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, - u64 len); +extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len); struct super_operations { @@ -2986,7 +2989,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *count, unsigned int remap_flags); + loff_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 410dc58f7b16..e9091d731f84 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks); */ int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *req_count, unsigned int remap_flags) + loff_t *req_count, unsigned int remap_flags) { struct inode *inode_in = file_in->f_mapping->host; struct inode *inode_out = file_out->f_mapping->host; -- cgit v1.2.3 From 452ce65951a2f0719e4e119ecca134c06cfe22ee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:56 +1100 Subject: vfs: plumb remap flags through the vfs clone functions Plumb a remap_flags argument through the {do,vfs}_clone_file_range functions so that clone can take advantage of it. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/ioctl.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/file.c | 6 +++--- fs/read_write.c | 13 +++++++++---- include/linux/fs.h | 4 ++-- 6 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/ioctl.c b/fs/ioctl.c index 72537b68c272..505275ec5596 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -232,7 +232,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, - olen); + olen, 0); if (cloned < 0) ret = cloned; else if (olen && cloned != olen) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ac6cb6101cbe..726fc5b2b27a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -543,7 +543,7 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, { loff_t cloned; - cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count); + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); if (count && cloned != count) cloned = -EINVAL; return nfserrno(cloned < 0 ? cloned : 0); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 8750b7235516..5f82fece64a0 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -142,7 +142,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - cloned = do_clone_file_range(old_file, 0, new_file, 0, len); + cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); if (cloned == len) goto out; /* Couldn't clone, so now we try to copy the data */ diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 6c3fec6168e9..0393815c8971 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -462,7 +462,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, case OVL_CLONE: ret = vfs_clone_file_range(real_in.file, pos_in, - real_out.file, pos_out, len); + real_out.file, pos_out, len, flags); break; case OVL_DEDUPE: @@ -512,8 +512,8 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, !ovl_inode_upper(file_inode(file_out)))) return -EPERM; - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - op); + return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, + remap_flags, op); } const struct file_operations ovl_file_operations = { diff --git a/fs/read_write.c b/fs/read_write.c index 356641afa487..0d1ac1b9bc22 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1848,12 +1848,15 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, EXPORT_SYMBOL(generic_remap_file_range_prep); loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, loff_t len) + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); loff_t ret; + WARN_ON_ONCE(remap_flags); + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) @@ -1884,7 +1887,7 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, return ret; ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, len, 0); + file_out, pos_out, len, remap_flags); if (ret < 0) return ret; @@ -1895,12 +1898,14 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, EXPORT_SYMBOL(do_clone_file_range); loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, loff_t len) + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { loff_t ret; file_start_write(file_out); - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, + remap_flags); file_end_write(file_out); return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index c72d8c3c065a..1c5e55d2a67d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1848,10 +1848,10 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - loff_t len); + loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - loff_t len); + loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); -- cgit v1.2.3 From df3658361951e17364f1e1c3fa92862a990ad8bd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:03 +1100 Subject: vfs: plumb remap flags through the vfs dedupe functions Plumb a remap_flags argument through the vfs_dedupe_file_range_one functions so that dedupe can take advantage of it. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/overlayfs/file.c | 3 ++- fs/read_write.c | 9 ++++++--- include/linux/fs.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 0393815c8971..84dd957efa24 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -467,7 +467,8 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, case OVL_DEDUPE: ret = vfs_dedupe_file_range_one(real_in.file, pos_in, - real_out.file, pos_out, len); + real_out.file, pos_out, len, + flags); break; } revert_creds(old_cred); diff --git a/fs/read_write.c b/fs/read_write.c index 0d1ac1b9bc22..ea30666013b0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -2010,10 +2010,12 @@ EXPORT_SYMBOL(vfs_dedupe_file_range_compare); loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, - loff_t len) + loff_t len, unsigned int remap_flags) { loff_t ret; + WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP)); + ret = mnt_want_write_file(dst_file); if (ret) return ret; @@ -2044,7 +2046,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, } ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, - dst_pos, len, REMAP_FILE_DEDUP); + dst_pos, len, remap_flags | REMAP_FILE_DEDUP); out_drop_write: mnt_drop_write_file(dst_file); @@ -2112,7 +2114,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) } deduped = vfs_dedupe_file_range_one(file, off, dst_file, - info->dest_offset, len); + info->dest_offset, len, + 0); if (deduped == -EBADE) info->status = FILE_DEDUPE_RANGE_DIFFERS; else if (deduped < 0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c5e55d2a67d..544ab5083b48 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1859,7 +1859,7 @@ extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, - loff_t len); + loff_t len, unsigned int remap_flags); struct super_operations { -- cgit v1.2.3 From eca3654e3cc7d93e9734d0fa96cfb15c7f356244 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:10 +1100 Subject: vfs: enable remap callers that can handle short operations Plumb in a remap flag that enables the filesystem remap handler to shorten remapping requests for callers that can handle it. Now copy_file_range can report partial success (in case we run up against alignment problems, resource limits, etc.). We also enable CAN_SHORTEN for fideduperange to maintain existing userspace-visible behavior where xfs/btrfs shorten the dedupe range to avoid stale post-eof data exposure. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- Documentation/filesystems/vfs.txt | 4 +++- fs/read_write.c | 28 ++++++++++++++++++++-------- include/linux/fs.h | 5 +++-- mm/filemap.c | 11 +++++++---- 4 files changed, 33 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1bd2919deaca..5f71a252e2e0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -970,7 +970,9 @@ otherwise noted. negative error code if errors occurred before any bytes were remapped. The remap_flags parameter accepts REMAP_FILE_* flags. If REMAP_FILE_DEDUP is set then the implementation must only remap if the - requested file ranges have identical contents. + requested file ranges have identical contents. If REMAP_CAN_SHORTEN is + set, the caller is ok with the implementation shortening the request + length to satisfy alignment or EOF requirements (or any other reason). fadvise: possibly called by the fadvise64() system call. diff --git a/fs/read_write.c b/fs/read_write.c index ea30666013b0..c0bcc1a20650 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1593,7 +1593,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, cloned = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, - min_t(loff_t, MAX_RW_COUNT, len), 0); + min_t(loff_t, MAX_RW_COUNT, len), + REMAP_FILE_CAN_SHORTEN); if (cloned > 0) { ret = cloned; goto done; @@ -1721,6 +1722,8 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, * can't meaningfully compare post-EOF contents. * * For clone we only link a partial EOF block above the destination file's EOF. + * + * Shorten the request if possible. */ static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, @@ -1729,16 +1732,24 @@ static int generic_remap_check_len(struct inode *inode_in, unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; + loff_t new_len = *len; if ((*len & blkmask) == 0) return 0; - if (remap_flags & REMAP_FILE_DEDUP) - *len &= ~blkmask; - else if (pos_out + *len < i_size_read(inode_out)) - return -EINVAL; + if ((remap_flags & REMAP_FILE_DEDUP) || + pos_out + *len < i_size_read(inode_out)) + new_len &= ~blkmask; - return 0; + if (new_len == *len) + return 0; + + if (remap_flags & REMAP_FILE_CAN_SHORTEN) { + *len = new_len; + return 0; + } + + return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; } /* @@ -2014,7 +2025,8 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, { loff_t ret; - WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP)); + WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | + REMAP_FILE_CAN_SHORTEN)); ret = mnt_want_write_file(dst_file); if (ret) @@ -2115,7 +2127,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) deduped = vfs_dedupe_file_range_one(file, off, dst_file, info->dest_offset, len, - 0); + REMAP_FILE_CAN_SHORTEN); if (deduped == -EBADE) info->status = FILE_DEDUPE_RANGE_DIFFERS; else if (deduped < 0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 544ab5083b48..34c22d695011 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,8 +1727,10 @@ struct block_device_operations; * See Documentation/filesystems/vfs.txt for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request */ #define REMAP_FILE_DEDUP (1 << 0) +#define REMAP_FILE_CAN_SHORTEN (1 << 1) /* * These flags signal that the caller is ok with altering various aspects of @@ -1736,9 +1738,8 @@ struct block_device_operations; * implementation; the vfs remap helper functions can take advantage of them. * Flags in this category exist to preserve the quirky behavior of the hoisted * btrfs clone/dedupe ioctls. - * There are no flags yet, but subsequent commits will add some. */ -#define REMAP_FILE_ADVISORY (0) +#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) struct iov_iter; diff --git a/mm/filemap.c b/mm/filemap.c index e9091d731f84..1775d4ad3317 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3045,8 +3045,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) - return -EINVAL; - + count = ALIGN_DOWN(count, bs); bcount = count; } @@ -3056,10 +3055,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, pos_out < pos_in + bcount) return -EINVAL; - /* For now we don't support changing the length. */ - if (*req_count != count) + /* + * We shortened the request but the caller can't deal with that, so + * bounce the request back to userspace. + */ + if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) return -EINVAL; + *req_count = count; return 0; } -- cgit v1.2.3 From c32e5f39953fa6bbff35c655bdcb7b3128f1e79f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:17 +1100 Subject: vfs: hide file range comparison function There are no callers of vfs_dedupe_file_range_compare, so we might as well make it a static helper and remove the export. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/read_write.c | 187 ++++++++++++++++++++++++++--------------------------- include/linux/fs.h | 3 - 2 files changed, 91 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index c0bcc1a20650..e4d295d0d236 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1752,6 +1752,97 @@ static int generic_remap_check_len(struct inode *inode_in, return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; } +/* + * Read a page's worth of file data into the page cache. Return the page + * locked. + */ +static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) +{ + struct page *page; + + page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL); + if (IS_ERR(page)) + return page; + if (!PageUptodate(page)) { + put_page(page); + return ERR_PTR(-EIO); + } + lock_page(page); + return page; +} + +/* + * Compare extents of two files to see if they are the same. + * Caller must have locked both inodes to prevent write races. + */ +static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same) +{ + loff_t src_poff; + loff_t dest_poff; + void *src_addr; + void *dest_addr; + struct page *src_page; + struct page *dest_page; + loff_t cmp_len; + bool same; + int error; + + error = -EINVAL; + same = true; + while (len) { + src_poff = srcoff & (PAGE_SIZE - 1); + dest_poff = destoff & (PAGE_SIZE - 1); + cmp_len = min(PAGE_SIZE - src_poff, + PAGE_SIZE - dest_poff); + cmp_len = min(cmp_len, len); + if (cmp_len <= 0) + goto out_error; + + src_page = vfs_dedupe_get_page(src, srcoff); + if (IS_ERR(src_page)) { + error = PTR_ERR(src_page); + goto out_error; + } + dest_page = vfs_dedupe_get_page(dest, destoff); + if (IS_ERR(dest_page)) { + error = PTR_ERR(dest_page); + unlock_page(src_page); + put_page(src_page); + goto out_error; + } + src_addr = kmap_atomic(src_page); + dest_addr = kmap_atomic(dest_page); + + flush_dcache_page(src_page); + flush_dcache_page(dest_page); + + if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) + same = false; + + kunmap_atomic(dest_addr); + kunmap_atomic(src_addr); + unlock_page(dest_page); + unlock_page(src_page); + put_page(dest_page); + put_page(src_page); + + if (!same) + break; + + srcoff += cmp_len; + destoff += cmp_len; + len -= cmp_len; + } + + *is_same = same; + return 0; + +out_error: + return error; +} + /* * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the @@ -1923,102 +2014,6 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_range); -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ -static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) -{ - struct address_space *mapping; - struct page *page; - pgoff_t n; - - n = offset >> PAGE_SHIFT; - mapping = inode->i_mapping; - page = read_mapping_page(mapping, n, NULL); - if (IS_ERR(page)) - return page; - if (!PageUptodate(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - lock_page(page); - return page; -} - -/* - * Compare extents of two files to see if they are the same. - * Caller must have locked both inodes to prevent write races. - */ -int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same) -{ - loff_t src_poff; - loff_t dest_poff; - void *src_addr; - void *dest_addr; - struct page *src_page; - struct page *dest_page; - loff_t cmp_len; - bool same; - int error; - - error = -EINVAL; - same = true; - while (len) { - src_poff = srcoff & (PAGE_SIZE - 1); - dest_poff = destoff & (PAGE_SIZE - 1); - cmp_len = min(PAGE_SIZE - src_poff, - PAGE_SIZE - dest_poff); - cmp_len = min(cmp_len, len); - if (cmp_len <= 0) - goto out_error; - - src_page = vfs_dedupe_get_page(src, srcoff); - if (IS_ERR(src_page)) { - error = PTR_ERR(src_page); - goto out_error; - } - dest_page = vfs_dedupe_get_page(dest, destoff); - if (IS_ERR(dest_page)) { - error = PTR_ERR(dest_page); - unlock_page(src_page); - put_page(src_page); - goto out_error; - } - src_addr = kmap_atomic(src_page); - dest_addr = kmap_atomic(dest_page); - - flush_dcache_page(src_page); - flush_dcache_page(dest_page); - - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) - same = false; - - kunmap_atomic(dest_addr); - kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); - put_page(dest_page); - put_page(src_page); - - if (!same) - break; - - srcoff += cmp_len; - destoff += cmp_len; - len -= cmp_len; - } - - *is_same = same; - return 0; - -out_error: - return error; -} -EXPORT_SYMBOL(vfs_dedupe_file_range_compare); - loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index 34c22d695011..346036a84f18 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1853,9 +1853,6 @@ extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, -- cgit v1.2.3