From a1548b674403c0de70cc29a1575689917ba60157 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:34 +0100 Subject: block: move rescan_partitions to fs/block_dev.c Large parts of rescan_partitions aren't about partitions, and moving it to block_dev.c will allow for some further cleanups by merging it into its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..d233dd661df7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2703,8 +2703,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -extern void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev, bool verbose); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *, bool); -- cgit v1.2.3 From f0b870df80bc70dad432fd0c142bb709a49964f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:36 +0100 Subject: block: remove (__)blkdev_reread_part as an exported API In general drivers should never mess with partition tables directly. Unfortunately s390 and loop do for somewhat historic reasons, but they can use bdev_disk_changed directly instead when we export it as they satisfy the sanity checks we have in __blkdev_reread_part. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland [dasd] Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- block/ioctl.c | 35 +++++------------------------------ drivers/block/loop.c | 13 ++++++++----- drivers/s390/block/dasd_genhd.c | 4 +++- fs/block_dev.c | 7 +++++++ include/linux/fs.h | 2 -- 5 files changed, 23 insertions(+), 38 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/ioctl.c b/block/ioctl.c index 5ccd9f016594..7ac8a66c9787 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -155,46 +155,21 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } -/* - * This is an exported API for the block driver, and will not - * acquire bd_mutex. This API should be used in case that - * caller has held bd_mutex already. - */ -int __blkdev_reread_part(struct block_device *bdev) +static int blkdev_reread_part(struct block_device *bdev) { + int ret; + if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - lockdep_assert_held(&bdev->bd_mutex); - - return bdev_disk_changed(bdev, false); -} -EXPORT_SYMBOL(__blkdev_reread_part); - -/* - * This is an exported API for the block driver, and will - * try to acquire bd_mutex. If bd_mutex has been held already - * in current context, please call __blkdev_reread_part(). - * - * Make sure the held locks in current context aren't required - * in open()/close() handler and I/O path for avoiding ABBA deadlock: - * - bd_mutex is held before calling block driver's open/close - * handler - * - reading partition table may submit I/O to the block device - */ -int blkdev_reread_part(struct block_device *bdev) -{ - int res; - mutex_lock(&bdev->bd_mutex); - res = __blkdev_reread_part(bdev); + ret = bdev_disk_changed(bdev, false); mutex_unlock(&bdev->bd_mutex); - return res; + return ret; } -EXPORT_SYMBOL(blkdev_reread_part); static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, unsigned long arg, unsigned long flags) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ef6e251857c8..739b372a5112 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -640,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo, { int rc; - rc = blkdev_reread_part(bdev); + mutex_lock(&bdev->bd_mutex); + rc = bdev_disk_changed(bdev, false); + mutex_unlock(&bdev->bd_mutex); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -1164,10 +1166,11 @@ out_unlock: * must be at least one and it can only become zero when the * current holder is released. */ - if (release) - err = __blkdev_reread_part(bdev); - else - err = blkdev_reread_part(bdev); + if (!release) + mutex_lock(&bdev->bd_mutex); + err = bdev_disk_changed(bdev, false); + if (!release) + mutex_unlock(&bdev->bd_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo_number, err); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 5542d9eadfe0..7d079154f849 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -116,7 +116,9 @@ int dasd_scan_partitions(struct dasd_block *block) return -ENODEV; } - rc = blkdev_reread_part(bdev); + mutex_lock(&bdev->bd_mutex); + rc = bdev_disk_changed(bdev, false); + mutex_unlock(&bdev->bd_mutex); if (rc) DBF_DEV_EVENT(DBF_ERR, block->base, "scan partitions error, rc %d", rc); diff --git a/fs/block_dev.c b/fs/block_dev.c index ae16466a67f7..9558a2f064b1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1513,6 +1513,8 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) struct gendisk *disk = bdev->bd_disk; int ret; + lockdep_assert_held(&bdev->bd_mutex); + rescan: ret = blk_drop_partitions(disk, bdev); if (ret) @@ -1540,6 +1542,11 @@ rescan: return ret; } +/* + * Only exported for for loop and dasd for historic reasons. Don't use in new + * code! + */ +EXPORT_SYMBOL_GPL(bdev_disk_changed); /* * bd_mutex locking: diff --git a/include/linux/fs.h b/include/linux/fs.h index d233dd661df7..ae6c5c37f3ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2632,8 +2632,6 @@ extern void bd_finish_claiming(struct block_device *bdev, extern void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); -extern int __blkdev_reread_part(struct block_device *bdev); -extern int blkdev_reread_part(struct block_device *bdev); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -- cgit v1.2.3 From 0be0ee71816b2b6725e2b4f32ad6726c9d729777 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 Nov 2019 15:51:03 -0800 Subject: vfs: properly and reliably lock f_pos in fdget_pos() fdget_pos() is used by file operations that will read and update f_pos: things like "read()", "write()" and "lseek()" (but not, for example, "pread()/pwrite" that get their file positions elsewhere). However, it had two separate escape clauses for this, because not everybody wants or needs serialization of the file position. The first and most obvious case is the "file descriptor doesn't have a position at all", ie a stream-like file. Except we didn't actually use FMODE_STREAM, but instead used FMODE_ATOMIC_POS. The reason for that was that FMODE_STREAM didn't exist back in the days, but also that we didn't want to mark all the special cases, so we only marked the ones that _required_ position atomicity according to POSIX - regular files and directories. The case one was intentionally lazy, but now that we _do_ have FMODE_STREAM we could and should just use it. With the change to use FMODE_STREAM, there are no remaining uses for FMODE_ATOMIC_POS, and all the code to set it is deleted. Any cases where we don't want the serialization because the driver (or subsystem) doesn't use the file position should just be updated to do "stream_open()". We've done that for all the obvious and common situations, we may need a few more. Quoting Kirill Smelkov in the original FMODE_STREAM thread (see link below for full email): "And I appreciate if people could help at least somehow with "getting rid of mixed case entirely" (i.e. always lock f_pos_lock on !FMODE_STREAM), because this transition starts to diverge from my particular use-case too far. To me it makes sense to do that transition as follows: - convert nonseekable_open -> stream_open via stream_open.cocci; - audit other nonseekable_open calls and convert left users that truly don't depend on position to stream_open; - extend stream_open.cocci to analyze alloc_file_pseudo as well (this will cover pipes and sockets), or maybe convert pipes and sockets to FMODE_STREAM manually; - extend stream_open.cocci to analyze file_operations that use no_llseek or noop_llseek, but do not use nonseekable_open or alloc_file_pseudo. This might find files that have stream semantic but are opened differently; - extend stream_open.cocci to analyze file_operations whose .read/.write do not use ppos at all (independently of how file was opened); - ... - after that remove FMODE_ATOMIC_POS and always take f_pos_lock if !FMODE_STREAM; - gather bug reports for deadlocked read/write and convert missed cases to FMODE_STREAM, probably extending stream_open.cocci along the road to catch similar cases i.e. always take f_pos_lock unless a file is explicitly marked as being stream, and try to find and cover all files that are streams" We have not done the "extend stream_open.cocci to analyze alloc_file_pseudo" as well, but the previous commit did manually handle the case of pipes and sockets. The other case where we can avoid locking f_pos is the "this file descriptor only has a single user and it is us, and thus there is no need to lock it". The second test was correct, although a bit subtle and worth just re-iterating here. There are two kinds of other sources of references to the same file descriptor: file descriptors that have been explicitly shared across fork() or with dup(), and file tables having elevated reference counts due to threading (or explicit file sharing with clone()). The first case would have incremented the file count explicitly, and in the second case the previous __fdget() would have incremented it for us and set the FDPUT_FPUT flag. But in both cases the file count would be greater than one, so the "file_count(file) > 1" test catches both situations. Also note that if file_count is 1, that also means that no other thread can have access to the file table, so there also cannot be races with concurrent calls to dup()/fork()/clone() that would increment the file count any other way. Link: https://lore.kernel.org/linux-fsdevel/20190413184404.GA13490@deco.navytux.spb.ru Cc: Kirill Smelkov Cc: Eic Dumazet Cc: Al Viro Cc: Alan Stern Cc: Marco Elver Cc: Andrea Parri Cc: Paul McKenney Signed-off-by: Linus Torvalds --- fs/file.c | 2 +- fs/open.c | 6 +----- include/linux/fs.h | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/file.c b/fs/file.c index 3da91a112bab..b241ea7f1aa4 100644 --- a/fs/file.c +++ b/fs/file.c @@ -795,7 +795,7 @@ unsigned long __fdget_pos(unsigned int fd) unsigned long v = __fdget(fd); struct file *file = (struct file *)(v & ~3); - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file && !(file->f_mode & FMODE_STREAM)) { if (file_count(file) > 1) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); diff --git a/fs/open.c b/fs/open.c index b62f5c0923a8..5c68282ea79e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -771,10 +771,6 @@ static int do_dentry_open(struct file *f, f->f_mode |= FMODE_WRITER; } - /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) - f->f_mode |= FMODE_ATOMIC_POS; - f->f_op = fops_get(inode->i_fop); if (WARN_ON(!f->f_op)) { error = -ENODEV; @@ -1256,7 +1252,7 @@ EXPORT_SYMBOL(nonseekable_open); */ int stream_open(struct inode *inode, struct file *filp) { - filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); filp->f_mode |= FMODE_STREAM; return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..a7c3f6dd5701 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -148,8 +148,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) -/* File needs atomic accesses to f_pos */ -#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) /* Has read method(s) */ -- cgit v1.2.3