From 640eb7e7b5242af53c456552a526d0080e6333f8 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Mon, 14 Nov 2016 22:14:35 +0100 Subject: fs: Constify path_is_under()'s arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function path_is_under() doesn't modify the paths pointed by its arguments but only browse them. Constifying this pointers make a cleaner interface to be used by (future) code which may only have access to const struct path pointers (e.g. LSM hooks). Signed-off-by: Mickaël Salaün Cc: Alexander Viro Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..f96501b51c49 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2709,7 +2709,7 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern bool is_subdir(struct dentry *, struct dentry *); -extern bool path_is_under(struct path *, struct path *); +extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); -- cgit v1.2.3 From ca71cf71eeda04dc9ad18271504e499013af5415 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Nov 2016 19:45:28 -0500 Subject: namespace.c: constify struct path passed to a bunch of primitives Signed-off-by: Al Viro --- fs/internal.h | 2 +- fs/namespace.c | 8 ++++---- include/linux/fs.h | 2 +- include/linux/mount.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a3..3e460159d835 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -62,7 +62,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, extern void *copy_mount_options(const void __user *); extern char *copy_mount_string(const void __user *); -extern struct vfsmount *lookup_mnt(struct path *); +extern struct vfsmount *lookup_mnt(const struct path *); extern int finish_automount(struct vfsmount *, struct path *); extern int sb_prepare_remount_readonly(struct super_block *); diff --git a/fs/namespace.c b/fs/namespace.c index 4d80a5066a1f..9ad88a45b3e3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -678,7 +678,7 @@ out: * * lookup_mnt takes a reference to the found vfsmount. */ -struct vfsmount *lookup_mnt(struct path *path) +struct vfsmount *lookup_mnt(const struct path *path) { struct mount *child_mnt; struct vfsmount *m; @@ -1159,7 +1159,7 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -struct vfsmount *mnt_clone_internal(struct path *path) +struct vfsmount *mnt_clone_internal(const struct path *path) { struct mount *p; p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); @@ -1758,7 +1758,7 @@ out: /* Caller should check returned pointer for errors */ -struct vfsmount *collect_mounts(struct path *path) +struct vfsmount *collect_mounts(const struct path *path) { struct mount *tree; namespace_lock(); @@ -1791,7 +1791,7 @@ void drop_collected_mounts(struct vfsmount *mnt) * * Release with mntput(). */ -struct vfsmount *clone_private_mount(struct path *path) +struct vfsmount *clone_private_mount(const struct path *path) { struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; diff --git a/include/linux/fs.h b/include/linux/fs.h index f96501b51c49..3056fe46f336 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2123,7 +2123,7 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct path *); +extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); diff --git a/include/linux/mount.h b/include/linux/mount.h index 1172cce949a4..cf2b5784b649 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -79,12 +79,12 @@ extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern struct vfsmount *mnt_clone_internal(struct path *path); +extern struct vfsmount *mnt_clone_internal(const struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); struct path; -extern struct vfsmount *clone_private_mount(struct path *path); +extern struct vfsmount *clone_private_mount(const struct path *path); struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, -- cgit v1.2.3 From f0bb5aaf2c51267c49ed5e2c6103df22acfe30f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Nov 2016 20:27:12 -0500 Subject: vfs: misc struct path constification Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- fs/statfs.c | 2 +- fs/utimes.c | 2 +- include/linux/fs.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index 5b4eed221530..1c8f4386b03f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2895,7 +2895,7 @@ bool may_open_dev(const struct path *path) !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } -static int may_open(struct path *path, int acc_mode, int flag) +static int may_open(const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; @@ -2945,7 +2945,7 @@ static int may_open(struct path *path, int acc_mode, int flag) static int handle_truncate(struct file *filp) { - struct path *path = &filp->f_path; + const struct path *path = &filp->f_path; struct inode *inode = path->dentry->d_inode; int error = get_write_access(inode); if (error) diff --git a/fs/statfs.c b/fs/statfs.c index 083dc0ac9140..13ae259d4879 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -63,7 +63,7 @@ static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) return retval; } -int vfs_statfs(struct path *path, struct kstatfs *buf) +int vfs_statfs(const struct path *path, struct kstatfs *buf) { int error; diff --git a/fs/utimes.c b/fs/utimes.c index 22307cdf7014..5fdb505e307c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -48,7 +48,7 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -static int utimes_common(struct path *path, struct timespec *times) +static int utimes_common(const struct path *path, struct timespec *times) { int error; struct iattr newattrs; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3056fe46f336..0e177d395efb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2127,7 +2127,7 @@ extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); -extern int vfs_statfs(struct path *, struct kstatfs *); +extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); extern int vfs_ustat(dev_t, struct kstatfs *); -- cgit v1.2.3 From 876bec6f9bbfcb394916d17e35226b086c04dc45 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 9 Dec 2016 16:18:30 -0800 Subject: vfs: refactor clone/dedupe_file_range common functions Hoist both the XFS reflink inode state and preparation code and the XFS file blocks compare functions into the VFS so that ocfs2 can take advantage of it for reflink and dedupe. Signed-off-by: Darrick J. Wong --- fs/read_write.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.c | 213 +++------------------------------------------------ include/linux/fs.h | 6 ++ 3 files changed, 219 insertions(+), 204 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/read_write.c b/fs/read_write.c index 6674a4b83c54..dbf3f7ffdf3f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1667,6 +1667,114 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) return security_file_permission(file, write ? MAY_WRITE : MAY_READ); } +/* + * Check that the two inodes are eligible for cloning, the ranges make + * sense, and then flush all dirty data. Caller must ensure that the + * inodes have been locked against any other modifications. + */ +int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, + struct inode *inode_out, loff_t pos_out, + u64 *len, bool is_dedupe) +{ + loff_t bs = inode_out->i_sb->s_blocksize; + loff_t blen; + loff_t isize; + bool same_inode = (inode_in == inode_out); + int ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Don't reflink dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) { + *len = 0; + return 0; + } + + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (*len == 0) { + if (is_dedupe) { + *len = 0; + return 0; + } + *len = isize - pos_in; + } + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + *len < pos_in || pos_out + *len < pos_out || + pos_in + *len > isize) + return -EINVAL; + + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; + + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + *len > disize) + return -EINVAL; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + *len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = *len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + return -EINVAL; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode) { + if (pos_out + blen > pos_in && pos_out < pos_in + blen) + return -EINVAL; + } + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + *len - 1); + if (ret) + return ret; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + *len - 1); + if (ret) + return ret; + + /* + * Check that the extents are the same. + */ + if (is_dedupe) { + bool is_same = false; + + ret = vfs_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same); + if (ret) + return ret; + if (!is_same) + return -EBADE; + } + + return 0; +} +EXPORT_SYMBOL(vfs_clone_file_prep_inodes); + int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len) { @@ -1718,6 +1826,102 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_range); +/* + * Read a page's worth of file data into the page cache. Return the page + * locked. + */ +static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) +{ + struct address_space *mapping; + struct page *page; + pgoff_t n; + + n = offset >> PAGE_SHIFT; + mapping = inode->i_mapping; + page = read_mapping_page(mapping, n, NULL); + if (IS_ERR(page)) + return page; + if (!PageUptodate(page)) { + put_page(page); + return ERR_PTR(-EIO); + } + lock_page(page); + return page; +} + +/* + * Compare extents of two files to see if they are the same. + * Caller must have locked both inodes to prevent write races. + */ +int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same) +{ + loff_t src_poff; + loff_t dest_poff; + void *src_addr; + void *dest_addr; + struct page *src_page; + struct page *dest_page; + loff_t cmp_len; + bool same; + int error; + + error = -EINVAL; + same = true; + while (len) { + src_poff = srcoff & (PAGE_SIZE - 1); + dest_poff = destoff & (PAGE_SIZE - 1); + cmp_len = min(PAGE_SIZE - src_poff, + PAGE_SIZE - dest_poff); + cmp_len = min(cmp_len, len); + if (cmp_len <= 0) + goto out_error; + + src_page = vfs_dedupe_get_page(src, srcoff); + if (IS_ERR(src_page)) { + error = PTR_ERR(src_page); + goto out_error; + } + dest_page = vfs_dedupe_get_page(dest, destoff); + if (IS_ERR(dest_page)) { + error = PTR_ERR(dest_page); + unlock_page(src_page); + put_page(src_page); + goto out_error; + } + src_addr = kmap_atomic(src_page); + dest_addr = kmap_atomic(dest_page); + + flush_dcache_page(src_page); + flush_dcache_page(dest_page); + + if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) + same = false; + + kunmap_atomic(dest_addr); + kunmap_atomic(src_addr); + unlock_page(dest_page); + unlock_page(src_page); + put_page(dest_page); + put_page(src_page); + + if (!same) + break; + + srcoff += cmp_len; + destoff += cmp_len; + len -= cmp_len; + } + + *is_same = same; + return 0; + +out_error: + return error; +} +EXPORT_SYMBOL(vfs_dedupe_file_range_compare); + int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) { struct file_dedupe_range_info *info; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a279b4e7f5fe..95d6828967f0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1164,111 +1164,6 @@ err: return error; } -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ -static struct page * -xfs_get_page( - struct inode *inode, - xfs_off_t offset) -{ - struct address_space *mapping; - struct page *page; - pgoff_t n; - - n = offset >> PAGE_SHIFT; - mapping = inode->i_mapping; - page = read_mapping_page(mapping, n, NULL); - if (IS_ERR(page)) - return page; - if (!PageUptodate(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - lock_page(page); - return page; -} - -/* - * Compare extents of two files to see if they are the same. - */ -static int -xfs_compare_extents( - struct inode *src, - xfs_off_t srcoff, - struct inode *dest, - xfs_off_t destoff, - xfs_off_t len, - bool *is_same) -{ - xfs_off_t src_poff; - xfs_off_t dest_poff; - void *src_addr; - void *dest_addr; - struct page *src_page; - struct page *dest_page; - xfs_off_t cmp_len; - bool same; - int error; - - error = -EINVAL; - same = true; - while (len) { - src_poff = srcoff & (PAGE_SIZE - 1); - dest_poff = destoff & (PAGE_SIZE - 1); - cmp_len = min(PAGE_SIZE - src_poff, - PAGE_SIZE - dest_poff); - cmp_len = min(cmp_len, len); - ASSERT(cmp_len > 0); - - trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len, - XFS_I(dest), destoff); - - src_page = xfs_get_page(src, srcoff); - if (IS_ERR(src_page)) { - error = PTR_ERR(src_page); - goto out_error; - } - dest_page = xfs_get_page(dest, destoff); - if (IS_ERR(dest_page)) { - error = PTR_ERR(dest_page); - unlock_page(src_page); - put_page(src_page); - goto out_error; - } - src_addr = kmap_atomic(src_page); - dest_addr = kmap_atomic(dest_page); - - flush_dcache_page(src_page); - flush_dcache_page(dest_page); - - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) - same = false; - - kunmap_atomic(dest_addr); - kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); - put_page(dest_page); - put_page(src_page); - - if (!same) - break; - - srcoff += cmp_len; - destoff += cmp_len; - len -= cmp_len; - } - - *is_same = same; - return 0; - -out_error: - trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_); - return error; -} - /* * Link a range of blocks from one file to another. */ @@ -1286,14 +1181,11 @@ xfs_reflink_remap_range( struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; - loff_t bs = inode_out->i_sb->s_blocksize; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; - loff_t isize; ssize_t ret; - loff_t blen; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1310,26 +1202,8 @@ xfs_reflink_remap_range( xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); } - /* Don't touch certain kinds of inodes */ - ret = -EPERM; - if (IS_IMMUTABLE(inode_out)) - goto out_unlock; - - ret = -ETXTBSY; - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - goto out_unlock; - - - /* Don't reflink dirs, pipes, sockets... */ - ret = -EISDIR; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - goto out_unlock; + /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; - if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - goto out_unlock; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - goto out_unlock; - /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; @@ -1338,91 +1212,18 @@ xfs_reflink_remap_range( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) { - ret = 0; - goto out_unlock; - } - - if (len == 0) - len = isize - pos_in; - - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + len < pos_in || pos_out + len < pos_out || - pos_in + len > isize) - goto out_unlock; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + len > disize) - goto out_unlock; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - goto out_unlock; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode) { - if (pos_out + blen > pos_in && pos_out < pos_in + blen) - goto out_unlock; - } - - /* Wait for the completion of any pending IOs on both files */ - inode_dio_wait(inode_in); - if (!same_inode) - inode_dio_wait(inode_out); - - ret = filemap_write_and_wait_range(inode_in->i_mapping, - pos_in, pos_in + len - 1); - if (ret) - goto out_unlock; - - ret = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + len - 1); - if (ret) + ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, + &len, is_dedupe); + if (ret || len == 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Check that the extents are the same. - */ - if (is_dedupe) { - bool is_same = false; - - ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, - len, &is_same); - if (ret) - goto out_unlock; - if (!is_same) { - ret = -EBADE; - goto out_unlock; - } - } - + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; - /* - * Invalidate the page cache so that we can clear any CoW mappings - * in the destination file. - */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1431,6 +1232,10 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); + /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..caea736fa09c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1778,8 +1778,14 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *, int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, + struct inode *inode_out, loff_t pos_out, + u64 *len, bool is_dedupe); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); +extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -- cgit v1.2.3