From 24addd848a45747bcda68418710c72fdc8e145e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Sep 2020 08:58:39 -0700 Subject: fs: Introduce i_blocks_per_page This helper is useful for both THPs and for supporting block size larger than page size. Convert all users that I could find (we have a few different ways of writing this idiom, and I may have missed some). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Acked-by: Dave Kleikamp --- include/linux/pagemap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7de11dcd534d..853733286138 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -899,4 +899,20 @@ static inline int page_mkwrite_check_truncate(struct page *page, return offset; } +/** + * i_blocks_per_page - How many blocks fit in this page. + * @inode: The inode which contains the blocks. + * @page: The page (head page if the page is a THP). + * + * If the block size is larger than the size of this page, return zero. + * + * Context: The caller should hold a refcount on the page to prevent it + * from being split. + * Return: The number of filesystem blocks covered by this page. + */ +static inline +unsigned int i_blocks_per_page(struct inode *inode, struct page *page) +{ + return thp_size(page) >> inode->i_blkbits; +} #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From 81ee8e52a71c712dbe04994f1430cb4c88b87ad6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Sep 2020 08:58:42 -0700 Subject: iomap: Change calling convention for zeroing Pass the full length to iomap_zero() and dax_iomap_zero(), and have them return how many bytes they actually handled. This is preparatory work for handling THP, although it looks like DAX could actually take advantage of it if there's a larger contiguous area. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/dax.c | 13 ++++++------- fs/iomap/buffered-io.c | 33 +++++++++++++++------------------ include/linux/dax.h | 3 +-- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 994ab66a9907..6ad346352a8c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, return ret; } -int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, - struct iomap *iomap) +s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) { sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); pgoff_t pgoff; long rc, id; void *kaddr; bool page_aligned = false; - + unsigned offset = offset_in_page(pos); + unsigned size = min_t(u64, PAGE_SIZE - offset, length); if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && - IS_ALIGNED(size, PAGE_SIZE)) + (size == PAGE_SIZE)) page_aligned = true; rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); @@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, id = dax_read_lock(); if (page_aligned) - rc = dax_zero_page_range(iomap->dax_dev, pgoff, - size >> PAGE_SHIFT); + rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); else rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); if (rc < 0) { @@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, dax_flush(iomap->dax_dev, kaddr + offset, size); } dax_read_unlock(id); - return 0; + return size; } static loff_t diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f26b24f39de4..8b6cca7e34e4 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -898,11 +898,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, } EXPORT_SYMBOL_GPL(iomap_file_unshare); -static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, - unsigned bytes, struct iomap *iomap, struct iomap *srcmap) +static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length, + struct iomap *iomap, struct iomap *srcmap) { struct page *page; int status; + unsigned offset = offset_in_page(pos); + unsigned bytes = min_t(u64, PAGE_SIZE - offset, length); status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap); if (status) @@ -914,38 +916,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap); } -static loff_t -iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, - void *data, struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos, + loff_t length, void *data, struct iomap *iomap, + struct iomap *srcmap) { bool *did_zero = data; loff_t written = 0; - int status; /* already zeroed? we're done. */ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) - return count; + return length; do { - unsigned offset, bytes; - - offset = offset_in_page(pos); - bytes = min_t(loff_t, PAGE_SIZE - offset, count); + s64 bytes; if (IS_DAX(inode)) - status = dax_iomap_zero(pos, offset, bytes, iomap); + bytes = dax_iomap_zero(pos, length, iomap); else - status = iomap_zero(inode, pos, offset, bytes, iomap, - srcmap); - if (status < 0) - return status; + bytes = iomap_zero(inode, pos, length, iomap, srcmap); + if (bytes < 0) + return bytes; pos += bytes; - count -= bytes; + length -= bytes; written += bytes; if (did_zero) *did_zero = true; - } while (count > 0); + } while (length > 0); return written; } diff --git a/include/linux/dax.h b/include/linux/dax.h index 6904d4e0b2e0..951a851a0481 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -214,8 +214,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, - struct iomap *iomap); +s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); -- cgit v1.2.3 From c3d4ed1abecfcfc801199cfadb71f5b80e025d9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 08:51:08 -0700 Subject: iomap: Allow filesystem to call iomap_dio_complete without i_rwsem This is to avoid the deadlock caused in btrfs because of O_DIRECT | O_DSYNC. Filesystems such as btrfs require i_rwsem while performing sync on a file. iomap_dio_rw() is called under i_rw_sem. This leads to a deadlock because of: iomap_dio_complete() generic_write_sync() btrfs_sync_file() Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems can call iomap_dio_complete() after unlocking i_rwsem. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Signed-off-by: Goldwyn Rodrigues Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/direct-io.c | 35 ++++++++++++++++++++++++++--------- include/linux/iomap.h | 5 +++++ 2 files changed, 31 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 024d4bb3028e..b005ef5f554e 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, dio->submit.cookie = submit_bio(bio); } -static ssize_t iomap_dio_complete(struct iomap_dio *dio) +ssize_t iomap_dio_complete(struct iomap_dio *dio) { const struct iomap_dio_ops *dops = dio->dops; struct kiocb *iocb = dio->iocb; @@ -130,6 +130,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) return ret; } +EXPORT_SYMBOL_GPL(iomap_dio_complete); static void iomap_dio_complete_work(struct work_struct *work) { @@ -416,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, * Returns -ENOTBLK In case of a page invalidation invalidation failure for * writes. The callers needs to fall back to buffered I/O in this case. */ -ssize_t -iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, +struct iomap_dio * +__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, bool wait_for_completion) { @@ -431,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_dio *dio; if (!count) - return 0; + return NULL; if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion)) - return -EIO; + return ERR_PTR(-EIO); dio = kmalloc(sizeof(*dio), GFP_KERNEL); if (!dio) - return -ENOMEM; + return ERR_PTR(-ENOMEM); dio->iocb = iocb; atomic_set(&dio->ref, 1); @@ -568,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { if (!wait_for_completion) - return -EIOCBQUEUED; + return ERR_PTR(-EIOCBQUEUED); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -584,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } - return iomap_dio_complete(dio); + return dio; out_free_dio: kfree(dio); - return ret; + if (ret) + return ERR_PTR(ret); + return NULL; +} +EXPORT_SYMBOL_GPL(__iomap_dio_rw); + +ssize_t +iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion) +{ + struct iomap_dio *dio; + + dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion); + if (IS_ERR_OR_NULL(dio)) + return PTR_ERR_OR_ZERO(dio); + return iomap_dio_complete(dio); } EXPORT_SYMBOL_GPL(iomap_dio_rw); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4d1d3c3469e9..172b3397a1a3 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -13,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_dio; struct iomap_writepage_ctx; struct iov_iter; struct kiocb; @@ -258,6 +259,10 @@ struct iomap_dio_ops { ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, bool wait_for_completion); +struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion); +ssize_t iomap_dio_complete(struct iomap_dio *dio); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP -- cgit v1.2.3