From 29f3ad7d8380364c86556eedf4eedd3b3d4921dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:11 +0100 Subject: fs: Provide function to unmap metadata for a range of blocks Provide function equivalent to unmap_underlying_metadata() for a range of blocks. We somewhat optimize the function to use pagevec lookups instead of looking up buffer heads one by one and use page lock to pin buffer heads instead of mapping's private_lock to improve scalability. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ebbacd14d450..9c9c73ce7d4f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -169,6 +169,8 @@ void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); void unmap_underlying_metadata(struct block_device *bdev, sector_t block); +void clean_bdev_aliases(struct block_device *bdev, sector_t block, + sector_t len); void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); -- cgit v1.2.3 From e64855c6cfaa0a80c1b71c5f647cb792dc436668 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:15 +0100 Subject: fs: Add helper to clean bdev aliases under a bh and use it Add a helper function that clears buffer heads from a block device aliasing passed bh. Use this helper function from filesystems instead of the original unmap_underlying_metadata() to save some boiler plate code and also have a better name for the functionalily since it is not unmapping anything for a *long* time. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/buffer.c | 8 +++----- fs/ext4/inode.c | 3 +-- fs/ext4/page-io.c | 2 +- fs/mpage.c | 3 +-- fs/ntfs/aops.c | 2 +- fs/ntfs/file.c | 5 ++--- fs/ocfs2/aops.c | 2 +- fs/ufs/balloc.c | 3 +-- fs/ufs/inode.c | 3 +-- include/linux/buffer_head.h | 4 ++++ 10 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index f8beca55240a..912d70169fca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1821,8 +1821,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); } } bh = bh->b_this_page; @@ -2068,8 +2067,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, } if (buffer_new(bh)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { clear_buffer_new(bh); set_buffer_uptodate(bh); @@ -2709,7 +2707,7 @@ int nobh_write_begin(struct address_space *mapping, if (!buffer_mapped(bh)) is_mapped_to_disk = 0; if (buffer_new(bh)) - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { set_buffer_uptodate(bh); continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c7cc4ae4b8e..2f8127601bef 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1123,8 +1123,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (err) break; if (buffer_new(bh)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { clear_buffer_new(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index e0b3b54cdef3..f28fd6483e04 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, } if (buffer_new(bh)) { clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); } set_buffer_async_write(bh); nr_to_submit++; diff --git a/fs/mpage.c b/fs/mpage.c index 98fc11aa7e0b..28af984a3d96 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -555,8 +555,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) - unmap_underlying_metadata(map_bh.b_bdev, - map_bh.b_blocknr); + clean_bdev_bh_alias(&map_bh); if (buffer_boundary(&map_bh)) { boundary_block = map_bh.b_blocknr; boundary_bdev = map_bh.b_bdev; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index d0cf6fee5c77..cc91856b5e2d 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -765,7 +765,7 @@ lock_retry_remap: } // TODO: Instantiate the hole. // clear_buffer_new(bh); - // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + // clean_bdev_bh_alias(bh); ntfs_error(vol->sb, "Writing into sparse regions is " "not supported yet. Sorry."); err = -EOPNOTSUPP; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index bf72a2c58b75..99510d811a8c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -740,8 +740,7 @@ map_buffer_cached: set_buffer_uptodate(bh); if (unlikely(was_hole)) { /* We allocated the buffer. */ - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (bh_end <= pos || bh_pos >= end) mark_buffer_dirty(bh); else @@ -784,7 +783,7 @@ map_buffer_cached: continue; } /* We allocated the buffer. */ - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); /* * If the buffer is fully outside the write, zero it, * set it uptodate, and mark it dirty so it gets diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..e8f65eefffca 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -630,7 +630,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (!buffer_mapped(bh)) { map_bh(bh, inode->i_sb, *p_blkno); - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); } if (PageUptodate(page)) { diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b035af54f538..a0376a2c1c29 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -307,8 +307,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, (unsigned long long)(pos + newb), pos); bh->b_blocknr = newb + pos; - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); mark_buffer_dirty(bh); ++j; bh = bh->b_this_page; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 190d64be22ed..45ceb94e89e4 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -1070,8 +1070,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size) if (buffer_new(bh)) { clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); /* * we do not zeroize fragment, because of * if it maped to hole, it already contains zeroes diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9c9c73ce7d4f..d1ab91fc6d43 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -171,6 +171,10 @@ int sync_mapping_buffers(struct address_space *mapping); void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); +static inline void clean_bdev_bh_alias(struct buffer_head *bh) +{ + clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1); +} void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); -- cgit v1.2.3 From ce98321bf7d274a470642ef99e1d82512673ce7c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:16 +0100 Subject: fs: Remove unmap_underlying_metadata Nobody is using this function anymore. Remove it. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/buffer.c | 32 -------------------------------- include/linux/buffer_head.h | 1 - 2 files changed, 33 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 912d70169fca..1104ce8b4536 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1605,38 +1605,6 @@ void create_empty_buffers(struct page *page, } EXPORT_SYMBOL(create_empty_buffers); -/* - * We are taking a block for data and we don't want any output from any - * buffer-cache aliases starting from return from that function and - * until the moment when something will explicitly mark the buffer - * dirty (hopefully that will not happen until we will free that block ;-) - * We don't even need to mark it not-uptodate - nobody can expect - * anything from a newly allocated buffer anyway. We used to used - * unmap_buffer() for such invalidation, but that was wrong. We definitely - * don't want to mark the alias unmapped, for example - it would confuse - * anyone who might pick it with bread() afterwards... - * - * Also.. Note that bforget() doesn't lock the buffer. So there can - * be writeout I/O going on against recently-freed buffers. We don't - * wait on that I/O in bforget() - it's more efficient to wait on the I/O - * only if we really need to. That happens here. - */ -void unmap_underlying_metadata(struct block_device *bdev, sector_t block) -{ - struct buffer_head *old_bh; - - might_sleep(); - - old_bh = __find_get_block_slow(bdev, block); - if (old_bh) { - clear_buffer_dirty(old_bh); - wait_on_buffer(old_bh); - clear_buffer_req(old_bh); - __brelse(old_bh); - } -} -EXPORT_SYMBOL(unmap_underlying_metadata); - /** * clean_bdev_aliases: clean a range of buffers in block device * @bdev: Block device to clean buffers in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d1ab91fc6d43..d67ab83823ad 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -168,7 +168,6 @@ int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); -void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) -- cgit v1.2.3 From 0911d0041c22922228ca52a977d7b0b0159fee4b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Feb 2017 14:30:53 -0800 Subject: mm: avoid returning VM_FAULT_RETRY from ->page_mkwrite handlers Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return code (GFS2 or Lustre can definitely do this). However VM_FAULT_RETRY from ->page_mkwrite is completely unhandled by the mm code and results in locking and writeably mapping the page which definitely is not what the caller wanted. Fix Lustre and block_page_mkwrite_ret() used by other filesystems (notably GFS2) to return VM_FAULT_NOPAGE instead which results in bailing out from the fault code, the CPU then retries the access, and we fault again effectively doing what the handler wanted. Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Al Viro Reviewed-by: Jinshan Xiong Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/lustre/lustre/llite/llite_mmap.c | 4 +--- include/linux/buffer_head.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index ee01f20d8b11..9afa6bec3e6f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -390,15 +390,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = VM_FAULT_LOCKED; break; case -ENODATA: + case -EAGAIN: case -EFAULT: result = VM_FAULT_NOPAGE; break; case -ENOMEM: result = VM_FAULT_OOM; break; - case -EAGAIN: - result = VM_FAULT_RETRY; - break; default: result = VM_FAULT_SIGBUS; break; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d67ab83823ad..79591c3660cc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -243,12 +243,10 @@ static inline int block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; - if (err == -EFAULT) + if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; - if (err == -EAGAIN) - return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } -- cgit v1.2.3