From e00adcadf3af7a8335026d71ab9f0e0a922191ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:11 +0100 Subject: block: add a new set_read_only method Add a new method to allow for driver-specific processing when setting or clearing the block device read-only state. This allows to replace the cumbersome and error-prone override of the whole ioctl implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..5c1ba8a8d2bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1850,6 +1850,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- cgit v1.2.3 From 98f49b63e84d4ee1a5c327d0b5f4e8699f6c70fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:17 +0100 Subject: block: remove set_device_ro Fold set_device_ro into its only remaining caller. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 7 ------- block/ioctl.c | 2 +- include/linux/genhd.h | 1 - 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 9387f050c248..b85db1f2233c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1846,13 +1846,6 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); } -void set_device_ro(struct block_device *bdev, int flag) -{ - bdev->bd_part->policy = flag; -} - -EXPORT_SYMBOL(set_device_ro); - void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; diff --git a/block/ioctl.c b/block/ioctl.c index 96cb45447364..04255dc5f3bf 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -371,7 +371,7 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, if (ret) return ret; } - set_device_ro(bdev, n); + bdev->bd_part->policy = n; return 0; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..52eb592c874a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -304,7 +304,6 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) -- cgit v1.2.3 From a7cb3d2f09c8405aed59d97a7d02cebea43cd3c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:18 +0100 Subject: block: remove __blkdev_driver_ioctl Just open code it in the few callers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/ioctl.c | 25 +++++-------------------- drivers/block/pktcdvd.c | 6 ++++-- drivers/md/bcache/request.c | 5 +++-- drivers/md/dm.c | 5 ++++- include/linux/blkdev.h | 2 -- 5 files changed, 16 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 04255dc5f3bf..6b785181344f 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -219,23 +219,6 @@ static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) } #endif -int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long arg) -{ - struct gendisk *disk = bdev->bd_disk; - - if (disk->fops->ioctl) - return disk->fops->ioctl(bdev, mode, cmd, arg); - - return -ENOTTY; -} -/* - * For the record: _GPL here is only because somebody decided to slap it - * on the previous export. Sheer idiocy, since it wasn't copyrightable - * at all and could be open-coded without any exports by anybody who cares. - */ -EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); - #ifdef CONFIG_COMPAT /* * This is the equivalent of compat_ptr_ioctl(), to be used by block @@ -594,10 +577,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, } ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); - if (ret == -ENOIOCTLCMD) - return __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (ret != -ENOIOCTLCMD) + return ret; - return ret; + if (!bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 467dbd06b7cd..ef1c1f094ea4 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2584,9 +2584,11 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case CDROM_LAST_WRITTEN: case CDROM_SEND_PACKET: case SCSI_IOCTL_SEND_COMMAND: - ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); break; - default: pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); ret = -ENOTTY; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 214326383145..afac8d07c1bd 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1230,8 +1230,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, if (dc->io_disable) return -EIO; - - return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); + if (!dc->bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg); } void bch_cached_dev_request_init(struct cached_dev *dc) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c18fc2548518..6db395c3d28b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -570,7 +570,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, } } - r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + r = -ENOTTY; + else + r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); out: dm_unprepare_ioctl(md, srcu_idx); return r; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c1ba8a8d2bc..05b346a68c2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1867,8 +1867,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, - unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -- cgit v1.2.3 From a160c6159d4a0cf82f28bc1658a958e278ec3688 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:28 +0100 Subject: block: add an optional probe callback to major_names Add a callback to the major_names array that allows a driver to override how to probe for dev_t that doesn't currently have a gendisk registered. This will help separating the lookup of the gendisk by dev_t vs probe action for a not currently registered dev_t. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 21 ++++++++++++++++++--- include/linux/genhd.h | 5 ++++- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 81017bd3b333..20521163fd06 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -402,6 +402,7 @@ static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; + void (*probe)(dev_t devt); } *major_names[BLKDEV_MAJOR_HASH_SIZE]; static DEFINE_MUTEX(major_names_lock); @@ -444,7 +445,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset) * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. */ -int register_blkdev(unsigned int major, const char *name) +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)) { struct blk_major_name **n, *p; int index, ret = 0; @@ -483,6 +485,7 @@ int register_blkdev(unsigned int major, const char *name) } p->major = major; + p->probe = probe; strlcpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); @@ -505,8 +508,7 @@ out: mutex_unlock(&major_names_lock); return ret; } - -EXPORT_SYMBOL(register_blkdev); +EXPORT_SYMBOL(__register_blkdev); void unregister_blkdev(unsigned int major, const char *name) { @@ -1033,6 +1035,19 @@ static ssize_t disk_badblocks_store(struct device *dev, static void request_gendisk_module(dev_t devt) { + unsigned int major = MAJOR(devt); + struct blk_major_name **n; + + mutex_lock(&major_names_lock); + for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { + if ((*n)->major == major && (*n)->probe) { + (*n)->probe(devt); + mutex_unlock(&major_names_lock); + return; + } + } + mutex_unlock(&major_names_lock); + if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) /* Make old-style 2.4 aliases work */ request_module("block-major-%d", MAJOR(devt)); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 52eb592c874a..811d0f83c4cf 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -367,7 +367,10 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -int register_blkdev(unsigned int major, const char *name); +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); -- cgit v1.2.3 From d18e8b1bf9e2ee814a7f886a156bf762d52e178b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:29 +0100 Subject: ide: remove ide_{,un}register_region There is no need to ever register the fake gendisk used for ide-tape. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/ide/ide-probe.c | 32 -------------------------------- drivers/ide/ide-tape.c | 2 -- include/linux/ide.h | 3 --- 3 files changed, 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1ddc45a04418..076d34b38172 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -929,38 +929,6 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data) return NULL; } -static struct kobject *exact_match(dev_t dev, int *part, void *data) -{ - struct gendisk *p = data; - *part &= (1 << PARTN_BITS) - 1; - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t dev, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; -} - -void ide_register_region(struct gendisk *disk) -{ - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); -} - -EXPORT_SYMBOL_GPL(ide_register_region); - -void ide_unregister_region(struct gendisk *disk) -{ - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); -} - -EXPORT_SYMBOL_GPL(ide_unregister_region); - void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 6f26634b22bb..88b96437b22e 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1822,7 +1822,6 @@ static void ide_tape_remove(ide_drive_t *drive) ide_proc_unregister_driver(drive, tape->driver); device_del(&tape->dev); - ide_unregister_region(tape->disk); mutex_lock(&idetape_ref_mutex); put_device(&tape->dev); @@ -2026,7 +2025,6 @@ static int ide_tape_probe(ide_drive_t *drive) "n%s", tape->name); g->fops = &idetape_block_ops; - ide_register_region(g); return 0; diff --git a/include/linux/ide.h b/include/linux/ide.h index 62653769509f..2c300689a51a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1493,9 +1493,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_register_region(struct gendisk *); -void ide_unregister_region(struct gendisk *); - void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); -- cgit v1.2.3 From e418de3abcda8b102f737919e830024d1455938f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:41 +0100 Subject: block: switch gendisk lookup to a simple xarray Now that bdev_map is only used for finding gendisks, we can use a simple xarray instead of the regions tracking structure for it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- block/genhd.c | 208 +++++++++----------------------------------------- include/linux/genhd.h | 7 -- 2 files changed, 37 insertions(+), 178 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 20521163fd06..01d146598fe7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -27,15 +27,7 @@ static struct kobject *block_depr; -struct bdev_map { - struct bdev_map *next; - dev_t dev; - unsigned long range; - struct module *owner; - struct kobject *(*probe)(dev_t, int *, void *); - int (*lock)(dev_t, void *); - void *data; -} *bdev_map[255]; +static DEFINE_XARRAY(bdev_map); static DEFINE_MUTEX(bdev_map_lock); /* for extended dynamic devt allocation, currently only one major is used */ @@ -649,85 +641,26 @@ static char *bdevt_str(dev_t devt, char *buf) return buf; } -/* - * Register device numbers dev..(dev+range-1) - * range must be nonzero - * The hash chain is sorted on range, so that subranges can override. - */ -void blk_register_region(dev_t devt, unsigned long range, struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), void *data) -{ - unsigned n = MAJOR(devt + range - 1) - MAJOR(devt) + 1; - unsigned index = MAJOR(devt); - unsigned i; - struct bdev_map *p; - - n = min(n, 255u); - p = kmalloc_array(n, sizeof(struct bdev_map), GFP_KERNEL); - if (p == NULL) - return; - - for (i = 0; i < n; i++, p++) { - p->owner = module; - p->probe = probe; - p->lock = lock; - p->dev = devt; - p->range = range; - p->data = data; - } +static void blk_register_region(struct gendisk *disk) +{ + int i; mutex_lock(&bdev_map_lock); - for (i = 0, p -= n; i < n; i++, p++, index++) { - struct bdev_map **s = &bdev_map[index % 255]; - while (*s && (*s)->range < range) - s = &(*s)->next; - p->next = *s; - *s = p; + for (i = 0; i < disk->minors; i++) { + if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL)) + WARN_ON_ONCE(1); } mutex_unlock(&bdev_map_lock); } -EXPORT_SYMBOL(blk_register_region); -void blk_unregister_region(dev_t devt, unsigned long range) +static void blk_unregister_region(struct gendisk *disk) { - unsigned n = MAJOR(devt + range - 1) - MAJOR(devt) + 1; - unsigned index = MAJOR(devt); - unsigned i; - struct bdev_map *found = NULL; + int i; mutex_lock(&bdev_map_lock); - for (i = 0; i < min(n, 255u); i++, index++) { - struct bdev_map **s; - for (s = &bdev_map[index % 255]; *s; s = &(*s)->next) { - struct bdev_map *p = *s; - if (p->dev == devt && p->range == range) { - *s = p->next; - if (!found) - found = p; - break; - } - } - } + for (i = 0; i < disk->minors; i++) + xa_erase(&bdev_map, disk_devt(disk) + i); mutex_unlock(&bdev_map_lock); - kfree(found); -} -EXPORT_SYMBOL(blk_unregister_region); - -static struct kobject *exact_match(dev_t devt, int *partno, void *data) -{ - struct gendisk *p = data; - - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t devt, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; } static void disk_scan_partitions(struct gendisk *disk) @@ -873,8 +806,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); WARN_ON(ret); bdi_set_owner(bdi, dev); - blk_register_region(disk_devt(disk), disk->minors, NULL, - exact_match, exact_lock, disk); + blk_register_region(disk); } register_disk(parent, disk, groups); if (register_queue) @@ -987,7 +919,7 @@ void del_gendisk(struct gendisk *disk) blk_unregister_queue(disk); if (!(disk->flags & GENHD_FL_HIDDEN)) - blk_unregister_region(disk_devt(disk), disk->minors); + blk_unregister_region(disk); /* * Remove gendisk pointer from idr so that it cannot be looked up * while RCU period before freeing gendisk is running to prevent @@ -1053,54 +985,22 @@ static void request_gendisk_module(dev_t devt) request_module("block-major-%d", MAJOR(devt)); } -static struct gendisk *lookup_gendisk(dev_t dev, int *partno) +static bool get_disk_and_module(struct gendisk *disk) { - struct kobject *kobj; - struct bdev_map *p; - unsigned long best = ~0UL; - -retry: - mutex_lock(&bdev_map_lock); - for (p = bdev_map[MAJOR(dev) % 255]; p; p = p->next) { - struct kobject *(*probe)(dev_t, int *, void *); - struct module *owner; - void *data; - - if (p->dev > dev || p->dev + p->range - 1 < dev) - continue; - if (p->range - 1 >= best) - break; - if (!try_module_get(p->owner)) - continue; - owner = p->owner; - data = p->data; - probe = p->probe; - best = p->range - 1; - *partno = dev - p->dev; - - if (!probe) { - mutex_unlock(&bdev_map_lock); - module_put(owner); - request_gendisk_module(dev); - goto retry; - } + struct module *owner; - if (p->lock && p->lock(dev, data) < 0) { - module_put(owner); - continue; - } - mutex_unlock(&bdev_map_lock); - kobj = probe(dev, partno, data); - /* Currently ->owner protects _only_ ->probe() itself. */ + if (!disk->fops) + return false; + owner = disk->fops->owner; + if (owner && !try_module_get(owner)) + return false; + if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) { module_put(owner); - if (kobj) - return dev_to_disk(kobj_to_dev(kobj)); - goto retry; + return false; } - mutex_unlock(&bdev_map_lock); - return NULL; -} + return true; +} /** * get_gendisk - get partitioning information for a given device @@ -1119,7 +1019,19 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) might_sleep(); if (MAJOR(devt) != BLOCK_EXT_MAJOR) { - disk = lookup_gendisk(devt, partno); + mutex_lock(&bdev_map_lock); + disk = xa_load(&bdev_map, devt); + if (!disk) { + mutex_unlock(&bdev_map_lock); + request_gendisk_module(devt); + mutex_lock(&bdev_map_lock); + disk = xa_load(&bdev_map, devt); + } + if (disk && !get_disk_and_module(disk)) + disk = NULL; + if (disk) + *partno = devt - disk_devt(disk); + mutex_unlock(&bdev_map_lock); } else { struct hd_struct *part; @@ -1323,21 +1235,6 @@ static const struct seq_operations partitions_op = { }; #endif -static void bdev_map_init(void) -{ - struct bdev_map *base; - int i; - - base = kzalloc(sizeof(*base), GFP_KERNEL); - if (!base) - panic("cannot allocate bdev_map"); - - base->dev = 1; - base->range = ~0 ; - for (i = 0; i < 255; i++) - bdev_map[i] = base; -} - static int __init genhd_device_init(void) { int error; @@ -1346,7 +1243,6 @@ static int __init genhd_device_init(void) error = class_register(&block_class); if (unlikely(error)) return error; - bdev_map_init(); blk_dev_init(); register_blkdev(BLOCK_EXT_MAJOR, "blkext"); @@ -1895,35 +1791,6 @@ out_free_disk: } EXPORT_SYMBOL(__alloc_disk_node); -/** - * get_disk_and_module - increments the gendisk and gendisk fops module refcount - * @disk: the struct gendisk to increment the refcount for - * - * This increments the refcount for the struct gendisk, and the gendisk's - * fops module owner. - * - * Context: Any context. - */ -struct kobject *get_disk_and_module(struct gendisk *disk) -{ - struct module *owner; - struct kobject *kobj; - - if (!disk->fops) - return NULL; - owner = disk->fops->owner; - if (owner && !try_module_get(owner)) - return NULL; - kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj); - if (kobj == NULL) { - module_put(owner); - return NULL; - } - return kobj; - -} -EXPORT_SYMBOL(get_disk_and_module); - /** * put_disk - decrements the gendisk refcount * @disk: the struct gendisk to decrement the refcount for @@ -1960,7 +1827,6 @@ void put_disk_and_module(struct gendisk *disk) module_put(owner); } } -EXPORT_SYMBOL(put_disk_and_module); static void set_disk_ro_uevent(struct gendisk *gd, int ro) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 811d0f83c4cf..3cbc2781ef34 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -339,15 +339,8 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void put_disk_and_module(struct gendisk *disk); -extern void blk_register_region(dev_t devt, unsigned long range, - struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), - void *data); -extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk_node(minors, node_id) \ ({ \ -- cgit v1.2.3 From 449f4ec9892ebc2f37a7eae6d97db2cf7c65e09a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:56:56 +0100 Subject: block: remove the update_bdev parameter to set_capacity_revalidate_and_notify The update_bdev argument is always set to true, so remove it. Also rename the function to the slighly less verbose set_capacity_and_notify, as propagating the disk size to the block device isn't really revalidation. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- block/genhd.c | 13 +++++-------- drivers/block/loop.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/scsi/sd.c | 5 ++--- include/linux/genhd.h | 3 +-- 7 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index ec2a24799cd9..4e039524f92b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -47,17 +47,15 @@ static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); /* - * Set disk capacity and notify if the size is not currently - * zero and will not be set to zero + * Set disk capacity and notify if the size is not currently zero and will not + * be set to zero. Returns true if a uevent was sent, otherwise false. */ -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev) +bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); set_capacity(disk, size); - if (update_bdev) - revalidate_disk_size(disk, true); + revalidate_disk_size(disk, true); if (capacity != size && capacity != 0 && size != 0) { char *envp[] = { "RESIZE=1", NULL }; @@ -68,8 +66,7 @@ bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, return false; } - -EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); +EXPORT_SYMBOL_GPL(set_capacity_and_notify); /* * Format the device name of the indicated disk into the supplied buffer and diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fcc5e32f0993..9a27d4f1c08a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -251,7 +251,7 @@ loop_validate_block_size(unsigned short bsize) */ static void loop_set_size(struct loop_device *lo, loff_t size) { - if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, true)) + if (!set_capacity_and_notify(lo->lo_disk, size)) kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a314b9382442..3e812b4c32e6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -470,7 +470,7 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) cap_str_10, cap_str_2); - set_capacity_revalidate_and_notify(vblk->disk, capacity, true); + set_capacity_and_notify(vblk->disk, capacity); } static void virtblk_config_changed_work(struct work_struct *work) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 48629d3433b4..79521e33d30e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2370,7 +2370,7 @@ static void blkfront_connect(struct blkfront_info *info) return; printk(KERN_INFO "Setting capacity to %Lu\n", sectors); - set_capacity_revalidate_and_notify(info->gd, sectors, true); + set_capacity_and_notify(info->gd, sectors); return; case BLKIF_STATE_SUSPENDED: diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f6c6479da0e9..6c144e748f8c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2053,7 +2053,7 @@ static void nvme_update_disk_info(struct gendisk *disk, capacity = 0; } - set_capacity_revalidate_and_notify(disk, capacity, true); + set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e9b898a1a480..679c2c025047 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3261,8 +3261,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sdkp->first_scan = 0; - set_capacity_revalidate_and_notify(disk, - logical_to_sectors(sdp, sdkp->capacity), true); + set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); kfree(buffer); @@ -3272,7 +3271,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) - set_capacity_revalidate_and_notify(disk, 0, true); + set_capacity_and_notify(disk, 0); out: return 0; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3cbc2781ef34..46553d6d6025 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -314,8 +314,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev); +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -- cgit v1.2.3 From 63653368c25ff0b1b1aaf045c97ea87bd8c16123 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 25 Nov 2020 14:58:17 +0800 Subject: block: remove unused BIO_SPLIT_ENTRIES Since commit 4b1faf931650 ("block: Kill bio_pair_split()"), there's no user of BIO_SPLIT_ENTRIES anymore. Signed-off-by: Jeffle Xu Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c6d765382926..ecf67108f091 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -711,12 +711,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ -- cgit v1.2.3 From 60b498852bf219c0bf2b0864c69972840978ca43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:21:18 +0100 Subject: fs: remove get_super_thawed and get_super_exclusive_thawed Just open code the wait in the only caller of both functions. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- fs/internal.h | 2 ++ fs/quota/quota.c | 31 ++++++++++++++++++++++++------- fs/super.c | 51 ++------------------------------------------------- include/linux/fs.h | 4 +--- 4 files changed, 29 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/fs/internal.h b/fs/internal.h index a7cd0f64faa4..47be21dfeebe 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -114,7 +114,9 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); +struct super_block *__get_super(struct block_device *bdev, bool excl); extern struct super_block *user_get_super(dev_t); +void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); /* diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9af95c7a0bbe..f3d32b0d9008 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -20,6 +20,7 @@ #include #include #include "compat.h" +#include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -868,6 +869,7 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) struct block_device *bdev; struct super_block *sb; struct filename *tmp = getname(special); + bool excl = false, thawed = false; if (IS_ERR(tmp)) return ERR_CAST(tmp); @@ -875,17 +877,32 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) putname(tmp); if (IS_ERR(bdev)) return ERR_CAST(bdev); - if (quotactl_cmd_onoff(cmd)) - sb = get_super_exclusive_thawed(bdev); - else if (quotactl_cmd_write(cmd)) - sb = get_super_thawed(bdev); - else - sb = get_super(bdev); + + if (quotactl_cmd_onoff(cmd)) { + excl = true; + thawed = true; + } else if (quotactl_cmd_write(cmd)) { + thawed = true; + } + +retry: + sb = __get_super(bdev, excl); + if (thawed && sb && sb->s_writers.frozen != SB_UNFROZEN) { + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); + wait_event(sb->s_writers.wait_unfrozen, + sb->s_writers.frozen == SB_UNFROZEN); + put_super(sb); + goto retry; + } + bdput(bdev); if (!sb) return ERR_PTR(-ENODEV); - return sb; + #else return ERR_PTR(-ENODEV); #endif diff --git a/fs/super.c b/fs/super.c index 98bb0629ee10..343e5c1e538d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,7 +307,7 @@ static void __put_super(struct super_block *s) * Drops a temporary reference, frees superblock if there's no * references left. */ -static void put_super(struct super_block *sb) +void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); @@ -740,7 +740,7 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -static struct super_block *__get_super(struct block_device *bdev, bool excl) +struct super_block *__get_super(struct block_device *bdev, bool excl) { struct super_block *sb; @@ -789,53 +789,6 @@ struct super_block *get_super(struct block_device *bdev) } EXPORT_SYMBOL(get_super); -static struct super_block *__get_super_thawed(struct block_device *bdev, - bool excl) -{ - while (1) { - struct super_block *s = __get_super(bdev, excl); - if (!s || s->s_writers.frozen == SB_UNFROZEN) - return s; - if (!excl) - up_read(&s->s_umount); - else - up_write(&s->s_umount); - wait_event(s->s_writers.wait_unfrozen, - s->s_writers.frozen == SB_UNFROZEN); - put_super(s); - } -} - -/** - * get_super_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen). %NULL is returned if no match - * is found. - */ -struct super_block *get_super_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, false); -} -EXPORT_SYMBOL(get_super_thawed); - -/** - * get_super_exclusive_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen) and s_umount semaphore is held - * in exclusive mode. %NULL is returned if no match is found. - */ -struct super_block *get_super_exclusive_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, true); -} -EXPORT_SYMBOL(get_super_exclusive_thawed); - /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..a61df0dd4f19 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1409,7 +1409,7 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ + wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -3132,8 +3132,6 @@ extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_super_thawed(struct block_device *); -extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -- cgit v1.2.3 From 040f04bd2e825f1d80b14a0e0ac3d830339eb779 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 11:54:06 +0100 Subject: fs: simplify freeze_bdev/thaw_bdev Store the frozen superblock in struct block_device to avoid the awkward interface that can return a sb only used a cookie, an ERR_PTR or NULL. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- drivers/md/dm-core.h | 5 ----- drivers/md/dm.c | 20 ++++++-------------- fs/block_dev.c | 37 +++++++++++++++---------------------- fs/buffer.c | 2 +- fs/ext4/ioctl.c | 2 +- fs/f2fs/file.c | 14 +++++--------- fs/xfs/xfs_fsops.c | 7 ++----- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- 9 files changed, 33 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index d522093cb39d..aace147effca 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -96,11 +96,6 @@ struct mapped_device { */ struct workqueue_struct *wq; - /* - * freeze/thaw support require holding onto a super block - */ - struct super_block *frozen_sb; - /* forced geometry settings */ struct hd_geometry geometry; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 54739f1b579b..50541d336c71 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2392,27 +2392,19 @@ static int lock_fs(struct mapped_device *md) { int r; - WARN_ON(md->frozen_sb); + WARN_ON(test_bit(DMF_FROZEN, &md->flags)); - md->frozen_sb = freeze_bdev(md->bdev); - if (IS_ERR(md->frozen_sb)) { - r = PTR_ERR(md->frozen_sb); - md->frozen_sb = NULL; - return r; - } - - set_bit(DMF_FROZEN, &md->flags); - - return 0; + r = freeze_bdev(md->bdev); + if (!r) + set_bit(DMF_FROZEN, &md->flags); + return r; } static void unlock_fs(struct mapped_device *md) { if (!test_bit(DMF_FROZEN, &md->flags)) return; - - thaw_bdev(md->bdev, md->frozen_sb); - md->frozen_sb = NULL; + thaw_bdev(md->bdev); clear_bit(DMF_FROZEN, &md->flags); } diff --git a/fs/block_dev.c b/fs/block_dev.c index d8664f5c1ff6..33c29106c989 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -548,55 +548,47 @@ EXPORT_SYMBOL(fsync_bdev); * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ -struct super_block *freeze_bdev(struct block_device *bdev) +int freeze_bdev(struct block_device *bdev) { struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) { - /* - * We don't even need to grab a reference - the first call - * to freeze_bdev grab an active reference and only the last - * thaw_bdev drops it. - */ - sb = get_super(bdev); - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; - } + if (++bdev->bd_fsfreeze_count > 1) + goto done; sb = get_active_super(bdev); if (!sb) - goto out; + goto sync; if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb); else error = freeze_super(sb); + deactivate_super(sb); + if (error) { - deactivate_super(sb); bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); + goto done; } - deactivate_super(sb); - out: + bdev->bd_fsfreeze_sb = sb; + +sync: sync_blockdev(bdev); +done: mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; /* thaw_bdev releases s->s_umount */ + return error; } EXPORT_SYMBOL(freeze_bdev); /** * thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock - * @sb: associated superblock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +int thaw_bdev(struct block_device *bdev) { + struct super_block *sb; int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); @@ -607,6 +599,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (--bdev->bd_fsfreeze_count > 0) goto out; + sb = bdev->bd_fsfreeze_sb; if (!sb) goto out; diff --git a/fs/buffer.c b/fs/buffer.c index 23f645657488..a7595ada9400 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -523,7 +523,7 @@ repeat: void emergency_thaw_bdev(struct super_block *sb) { - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + while (sb->s_bdev && !thaw_bdev(sb->s_bdev)) printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5..524e13432447 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -624,7 +624,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) case EXT4_GOING_FLAGS_DEFAULT: freeze_bdev(sb->s_bdev); set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee861c6d9ff0..a9fc482a0e60 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2230,16 +2230,12 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: - sb = freeze_bdev(sb->s_bdev); - if (IS_ERR(sb)) { - ret = PTR_ERR(sb); + ret = freeze_bdev(sb->s_bdev); + if (ret) goto out; - } - if (sb) { - f2fs_stop_checkpoint(sbi, false); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - thaw_bdev(sb->s_bdev, sb); - } + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + thaw_bdev(sb->s_bdev); break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index ef1d5bb88b93..b7c5783a031c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -433,13 +433,10 @@ xfs_fs_goingdown( { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev); - - if (sb && !IS_ERR(sb)) { + if (!freeze_bdev(mp->m_super->s_bdev)) { xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(mp->m_super->s_bdev); } - break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc..ebfb4e7c1fd1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -46,6 +46,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + struct super_block *bd_fsfreeze_sb; } __randomize_layout; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 05b346a68c2e..12810a19edeb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2020,7 +2020,7 @@ static inline int sync_blockdev(struct block_device *bdev) #endif int fsync_bdev(struct block_device *bdev); -struct super_block *freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev, struct super_block *sb); +int freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev); #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From b601d148a16ea16dfbaf3600be35ee175847a09b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:21:52 +0100 Subject: block: remove a duplicate __disk_get_part prototype Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 46553d6d6025..22f5b9fd96f8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -250,7 +250,6 @@ static inline dev_t part_devt(struct hd_struct *part) return part_to_dev(part)->devt; } -extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) -- cgit v1.2.3 From 8d65269fe8065fee889bca5b204d711b0695a8f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Nov 2020 08:18:55 +0100 Subject: block: add a bdev_kobj helper Add a little helper to find the kobject for a struct block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Acked-by: David Sterba [btrfs] Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 7 ++----- drivers/md/md.c | 4 +--- fs/block_dev.c | 6 +++--- fs/btrfs/sysfs.c | 15 +++------------ include/linux/blk_types.h | 3 +++ 5 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46a00134a36a..a6a5e21e4fd1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1447,8 +1447,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; err = "error creating kobject"; - if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, - "bcache")) + if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) goto err; if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; @@ -2342,9 +2341,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; } - if (kobject_add(&ca->kobj, - &part_to_dev(bdev->bd_part)->kobj, - "bcache")) { + if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { err = "error calling kobject_add"; ret = -ENOMEM; goto out; diff --git a/drivers/md/md.c b/drivers/md/md.c index b2edf5e0f965..7ce6047c856e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2414,7 +2414,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev); static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; - struct kobject *ko; int err; /* prevent duplicates */ @@ -2477,9 +2476,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; /* failure here is OK */ - err = sysfs_create_link(&rdev->kobj, ko, "block"); + err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_unack_badblocks = sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); diff --git a/fs/block_dev.c b/fs/block_dev.c index 33c29106c989..c5755150c6be 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1242,7 +1242,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) holder->disk = disk; holder->refcnt = 1; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); if (ret) goto out_free; @@ -1259,7 +1259,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) goto out_unlock; out_del: - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); out_free: kfree(holder); out_unlock: @@ -1287,7 +1287,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); kobject_put(bdev->bd_part->holder_dir); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 279d9262b676..24b6c6dc6900 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1232,8 +1232,6 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, void btrfs_sysfs_remove_device(struct btrfs_device *device) { - struct hd_struct *disk; - struct kobject *disk_kobj; struct kobject *devices_kobj; /* @@ -1243,11 +1241,8 @@ void btrfs_sysfs_remove_device(struct btrfs_device *device) devices_kobj = device->fs_info->fs_devices->devices_kobj; ASSERT(devices_kobj); - if (device->bdev) { - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(devices_kobj, disk_kobj->name); - } + if (device->bdev) + sysfs_remove_link(devices_kobj, bdev_kobj(device->bdev)->name); if (device->devid_kobj.state_initialized) { kobject_del(&device->devid_kobj); @@ -1353,11 +1348,7 @@ int btrfs_sysfs_add_device(struct btrfs_device *device) nofs_flag = memalloc_nofs_save(); if (device->bdev) { - struct hd_struct *disk; - struct kobject *disk_kobj; - - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; + struct kobject *disk_kobj = bdev_kobj(device->bdev); ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name); if (ret) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ebfb4e7c1fd1..9698f459cc65 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_kobj(_bdev) \ + (&part_to_dev((_bdev)->bd_part)->kobj) + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. -- cgit v1.2.3 From c2637e80a09e0d6c698d2771d7230f59c2138122 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 14 Nov 2020 19:19:57 +0100 Subject: init: refactor name_to_dev_t Split each case into a self-contained helper, and move the block dependent code entirely under the pre-existing #ifdef CONFIG_BLOCK. This allows to remove the blk_lookup_devt stub in genhd.h. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 7 +- init/do_mounts.c | 183 +++++++++++++++++++++++++------------------------- 2 files changed, 91 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 22f5b9fd96f8..ca5e356084c3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -388,18 +388,13 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +dev_t blk_lookup_devt(const char *name, int partno); #ifdef CONFIG_BLOCK void printk_all_partitions(void); -dev_t blk_lookup_devt(const char *name, int partno); #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - return devt; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ diff --git a/init/do_mounts.c b/init/do_mounts.c index b5f9604d0c98..aef2f24461c7 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -90,7 +90,6 @@ no_match: return 0; } - /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID * @uuid_str: char array containing ascii UUID @@ -186,7 +185,83 @@ static int match_dev_by_label(struct device *dev, const void *data) return 0; } -#endif + +static dev_t devt_from_partlabel(const char *label) +{ + struct device *dev; + dev_t devt = 0; + + dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); + if (dev) { + devt = dev->devt; + put_device(dev); + } + + return devt; +} + +static dev_t devt_from_devname(const char *name) +{ + dev_t devt = 0; + int part; + char s[32]; + char *p; + + if (strlen(name) > 31) + return 0; + strcpy(s, name); + for (p = s; *p; p++) { + if (*p == '/') + *p = '!'; + } + + devt = blk_lookup_devt(s, 0); + if (devt) + return devt; + + /* + * Try non-existent, but valid partition, which may only exist after + * opening the device, like partitioned md devices. + */ + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + return 0; + + /* try disk name without */ + part = simple_strtoul(p, NULL, 10); + *p = '\0'; + devt = blk_lookup_devt(s, part); + if (devt) + return devt; + + /* try disk name without p */ + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + return 0; + p[-1] = '\0'; + return blk_lookup_devt(s, part); +} +#endif /* CONFIG_BLOCK */ + +static dev_t devt_from_devnum(const char *name) +{ + unsigned maj, min, offset; + dev_t devt = 0; + char *p, dummy; + + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { + devt = MKDEV(maj, min); + if (maj != MAJOR(devt) || min != MINOR(devt)) + return 0; + } else { + devt = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + return 0; + } + + return devt; +} /* * Convert a name into device number. We accept the following variants: @@ -218,101 +293,23 @@ static int match_dev_by_label(struct device *dev, const void *data) * name contains slashes, the device name has them replaced with * bangs. */ - dev_t name_to_dev_t(const char *name) { - char s[32]; - char *p; - dev_t res = 0; - int part; - + if (strcmp(name, "/dev/nfs") == 0) + return Root_NFS; + if (strcmp(name, "/dev/cifs") == 0) + return Root_CIFS; + if (strcmp(name, "/dev/ram") == 0) + return Root_RAM0; #ifdef CONFIG_BLOCK - if (strncmp(name, "PARTUUID=", 9) == 0) { - name += 9; - res = devt_from_partuuid(name); - if (!res) - goto fail; - goto done; - } else if (strncmp(name, "PARTLABEL=", 10) == 0) { - struct device *dev; - - dev = class_find_device(&block_class, NULL, name + 10, - &match_dev_by_label); - if (!dev) - goto fail; - - res = dev->devt; - put_device(dev); - goto done; - } + if (strncmp(name, "PARTUUID=", 9) == 0) + return devt_from_partuuid(name + 9); + if (strncmp(name, "PARTLABEL=", 10) == 0) + return devt_from_partlabel(name + 10); + if (strncmp(name, "/dev/", 5) == 0) + return devt_from_devname(name + 5); #endif - - if (strncmp(name, "/dev/", 5) != 0) { - unsigned maj, min, offset; - char dummy; - - if ((sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) || - (sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3)) { - res = MKDEV(maj, min); - if (maj != MAJOR(res) || min != MINOR(res)) - goto fail; - } else { - res = new_decode_dev(simple_strtoul(name, &p, 16)); - if (*p) - goto fail; - } - goto done; - } - - name += 5; - res = Root_NFS; - if (strcmp(name, "nfs") == 0) - goto done; - res = Root_CIFS; - if (strcmp(name, "cifs") == 0) - goto done; - res = Root_RAM0; - if (strcmp(name, "ram") == 0) - goto done; - - if (strlen(name) > 31) - goto fail; - strcpy(s, name); - for (p = s; *p; p++) - if (*p == '/') - *p = '!'; - res = blk_lookup_devt(s, 0); - if (res) - goto done; - - /* - * try non-existent, but valid partition, which may only exist - * after revalidating the disk, like partitioned md devices - */ - while (p > s && isdigit(p[-1])) - p--; - if (p == s || !*p || *p == '0') - goto fail; - - /* try disk name without */ - part = simple_strtoul(p, NULL, 10); - *p = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - - /* try disk name without p */ - if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - goto fail; - p[-1] = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - -fail: - return 0; -done: - return res; + return devt_from_devnum(name); } EXPORT_SYMBOL_GPL(name_to_dev_t); -- cgit v1.2.3 From 4e7b5671c6a883d94b5428e1a9c141bbd56cb2a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:38:40 +0100 Subject: block: remove i_bdev Switch the block device lookup interfaces to directly work with a dev_t so that struct block_device references are only acquired by the blkdev_get variants (and the blk-cgroup special case). This means that we now don't need an extra reference in the inode and can generally simplify handling of struct block_device to keep the lookups contained in the core block layer code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Signed-off-by: Jens Axboe --- block/ioctl.c | 3 +- drivers/block/loop.c | 8 +- drivers/md/bcache/super.c | 20 +-- drivers/md/dm-table.c | 9 +- drivers/mtd/mtdsuper.c | 17 +-- drivers/target/target_core_file.c | 6 +- drivers/usb/gadget/function/storage_common.c | 8 +- fs/block_dev.c | 196 +++++++-------------------- fs/btrfs/volumes.c | 13 +- fs/inode.c | 3 - fs/internal.h | 7 +- fs/io_uring.c | 10 +- fs/pipe.c | 5 +- fs/quota/quota.c | 19 ++- fs/statfs.c | 2 +- fs/super.c | 44 +++--- include/linux/blkdev.h | 2 +- include/linux/fs.h | 1 - 18 files changed, 121 insertions(+), 252 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 0c09bb7a6ff3..a6d8171221c7 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -590,8 +590,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret; void __user *argp = compat_ptr(arg); - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(file->f_mapping->host); struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; loff_t size; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b42c728620c9..26c7aafba7c5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -675,10 +675,10 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; - if (f->f_mapping->host->i_bdev == bdev) + if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; - l = f->f_mapping->host->i_bdev->bd_disk->private_data; + l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; if (l->lo_state != Lo_bound) { return -EINVAL; } @@ -885,9 +885,7 @@ static void loop_config_discard(struct loop_device *lo) * file-backed loop devices: discarded regions read back as zero. */ if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { - struct request_queue *backingq; - - backingq = bdev_get_queue(inode->i_bdev); + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; granularity = backingq->limits.discard_granularity ?: diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a6a5e21e4fd1..c55d3c58a7ef 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2380,38 +2380,38 @@ kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); -static bool bch_is_open_backing(struct block_device *bdev) +static bool bch_is_open_backing(dev_t dev) { struct cache_set *c, *tc; struct cached_dev *dc, *t; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) list_for_each_entry_safe(dc, t, &c->cached_devs, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; list_for_each_entry_safe(dc, t, &uncached_devices, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; return false; } -static bool bch_is_open_cache(struct block_device *bdev) +static bool bch_is_open_cache(dev_t dev) { struct cache_set *c, *tc; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { struct cache *ca = c->cache; - if (ca->bdev == bdev) + if (ca->bdev->bd_dev == dev) return true; } return false; } -static bool bch_is_open(struct block_device *bdev) +static bool bch_is_open(dev_t dev) { - return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); + return bch_is_open_cache(dev) || bch_is_open_backing(dev); } struct async_reg_args { @@ -2535,9 +2535,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, sb); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { - bdev = lookup_bdev(strim(path)); + dev_t dev; + mutex_lock(&bch_register_lock); - if (!IS_ERR(bdev) && bch_is_open(bdev)) + if (lookup_bdev(strim(path), &dev) == 0 && + bch_is_open(dev)) err = "device already registered"; else err = "device busy"; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ce543b761be7..dea677721710 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -348,16 +348,9 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, dev_t dm_get_dev_t(const char *path) { dev_t dev; - struct block_device *bdev; - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) + if (lookup_bdev(path, &dev)) dev = name_to_dev_t(path); - else { - dev = bdev->bd_dev; - bdput(bdev); - } - return dev; } EXPORT_SYMBOL_GPL(dm_get_dev_t); diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index c3e2098372f2..38b6aa849c63 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -120,8 +120,8 @@ int get_tree_mtd(struct fs_context *fc, struct fs_context *fc)) { #ifdef CONFIG_BLOCK - struct block_device *bdev; - int ret, major; + dev_t dev; + int ret; #endif int mtdnr; @@ -169,20 +169,15 @@ int get_tree_mtd(struct fs_context *fc, /* try the old way - the hack where we allowed users to mount * /dev/mtdblock$(n) but didn't actually _use_ the blockdev */ - bdev = lookup_bdev(fc->source); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); + ret = lookup_bdev(fc->source, &dev); + if (ret) { errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); return ret; } pr_debug("MTDSB: lookup_bdev() returned 0\n"); - major = MAJOR(bdev->bd_dev); - mtdnr = MINOR(bdev->bd_dev); - bdput(bdev); - - if (major == MTD_BLOCK_MAJOR) - return mtd_get_sb_by_nr(fc, mtdnr, fill_super); + if (MAJOR(dev) == MTD_BLOCK_MAJOR) + return mtd_get_sb_by_nr(fc, MINOR(dev), fill_super); #endif /* CONFIG_BLOCK */ diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 7143d03f0e02..b0cb5b95e892 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -133,10 +133,10 @@ static int fd_configure_device(struct se_device *dev) */ inode = file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { - struct request_queue *q = bdev_get_queue(inode->i_bdev); + struct request_queue *q = bdev_get_queue(I_BDEV(inode)); unsigned long long dev_size; - fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev); + fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode)); /* * Determine the number of bytes from i_size_read() minus * one (1) logical sector from underlying struct block_device @@ -559,7 +559,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) if (S_ISBLK(inode->i_mode)) { /* The backend is block device, use discard */ - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(inode); struct se_device *dev = cmd->se_dev; ret = blkdev_issue_discard(bdev, diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index f7e6c42558eb..b859a158a414 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -204,7 +204,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_WRITE)) ro = 1; - inode = file_inode(filp); + inode = filp->f_mapping->host; if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; @@ -221,7 +221,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_CAN_WRITE)) ro = 1; - size = i_size_read(inode->i_mapping->host); + size = i_size_read(inode); if (size < 0) { LINFO(curlun, "unable to find file size: %s\n", filename); rc = (int) size; @@ -231,8 +231,8 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (curlun->cdrom) { blksize = 2048; blkbits = 11; - } else if (inode->i_bdev) { - blksize = bdev_logical_block_size(inode->i_bdev); + } else if (S_ISBLK(inode->i_mode)) { + blksize = bdev_logical_block_size(I_BDEV(inode)); blkbits = blksize_bits(blksize); } else { blksize = 512; diff --git a/fs/block_dev.c b/fs/block_dev.c index 2b8c0586314f..6d6e4d50834c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -883,7 +883,6 @@ static struct block_device *bdget(dev_t dev) bdev->bd_dev = dev; inode->i_mode = S_IFBLK; inode->i_rdev = dev; - inode->i_bdev = bdev; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); unlock_new_inode(inode); @@ -928,67 +927,8 @@ void bdput(struct block_device *bdev) { iput(bdev->bd_inode); } - EXPORT_SYMBOL(bdput); -static struct block_device *bd_acquire(struct inode *inode) -{ - struct block_device *bdev; - - spin_lock(&bdev_lock); - bdev = inode->i_bdev; - if (bdev && !inode_unhashed(bdev->bd_inode)) { - bdgrab(bdev); - spin_unlock(&bdev_lock); - return bdev; - } - spin_unlock(&bdev_lock); - - /* - * i_bdev references block device inode that was already shut down - * (corresponding device got removed). Remove the reference and look - * up block device inode again just in case new device got - * reestablished under the same device number. - */ - if (bdev) - bd_forget(inode); - - bdev = bdget(inode->i_rdev); - if (bdev) { - spin_lock(&bdev_lock); - if (!inode->i_bdev) { - /* - * We take an additional reference to bd_inode, - * and it's released in clear_inode() of inode. - * So, we can access it via ->i_mapping always - * without igrab(). - */ - bdgrab(bdev); - inode->i_bdev = bdev; - inode->i_mapping = bdev->bd_inode->i_mapping; - } - spin_unlock(&bdev_lock); - } - return bdev; -} - -/* Call when you free inode */ - -void bd_forget(struct inode *inode) -{ - struct block_device *bdev = NULL; - - spin_lock(&bdev_lock); - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - inode->i_bdev = NULL; - inode->i_mapping = &inode->i_data; - spin_unlock(&bdev_lock); - - if (bdev) - bdput(bdev); -} - /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest @@ -1497,38 +1437,45 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, } /** - * blkdev_get - open a block device - * @bdev: block_device to open + * blkdev_get_by_dev - open a block device by device number + * @dev: device number of block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is - * open with exclusive access. Specifying %FMODE_EXCL with %NULL - * @holder is invalid. Exclusive opens may nest for the same @holder. + * Open the block device described by device number @dev. If @mode includes + * %FMODE_EXCL, the block device is opened with exclusive access. Specifying + * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for + * the same @holder. * - * On success, the reference count of @bdev is unchanged. On failure, - * @bdev is put. + * Use this interface ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a device + * number. Everything else should use blkdev_get_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * 0 on success, -errno on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ -static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { struct block_device *claiming; bool unblock_events = true; + struct block_device *bdev; struct gendisk *disk; int partno; int ret; ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, - imajor(bdev->bd_inode), iminor(bdev->bd_inode), + MAJOR(dev), MINOR(dev), ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) - goto bdput; + return ERR_PTR(ret); + + bdev = bdget(dev); + if (!bdev) + return ERR_PTR(-ENOMEM); /* * If we lost a race with 'disk' being deleted, try again. See md.c. @@ -1589,10 +1536,13 @@ put_disk: if (ret == -ERESTARTSYS) goto retry; bdput: - if (ret) + if (ret) { bdput(bdev); - return ret; + return ERR_PTR(ret); + } + return bdev; } +EXPORT_SYMBOL(blkdev_get_by_dev); /** * blkdev_get_by_path - open a block device by name @@ -1600,32 +1550,30 @@ bdput: * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the blockdevice described by the device file at @path. @mode - * and @holder are identical to blkdev_get(). - * - * On success, the returned block_device has reference count of one. + * Open the block device described by the device file at @path. If @mode + * includes %FMODE_EXCL, the block device is opened with exclusive access. + * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may + * nest for the same @holder. * * CONTEXT: * Might sleep. * * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - int err; - - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; + dev_t dev; + int error; - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + bdev = blkdev_get_by_dev(dev, mode, holder); + if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { blkdev_put(bdev, mode); return ERR_PTR(-EACCES); } @@ -1634,45 +1582,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -/** - * blkdev_get_by_dev - open a block device by device number - * @dev: device number of block device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open the blockdevice described by device number @dev. @mode and - * @holder are identical to blkdev_get(). - * - * Use it ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a - * device number. _Never_ to be used for internal purposes. If you - * ever need it - reconsider your API. - * - * On success, the returned block_device has reference count of one. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. - */ -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) -{ - struct block_device *bdev; - int err; - - bdev = bdget(dev); - if (!bdev) - return ERR_PTR(-ENOMEM); - - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); - - return bdev; -} -EXPORT_SYMBOL(blkdev_get_by_dev); - static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1694,14 +1603,12 @@ static int blkdev_open(struct inode * inode, struct file * filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = bd_acquire(inode); - if (bdev == NULL) - return -ENOMEM; - + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); filp->f_mapping = bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - - return blkdev_get(bdev, filp->f_mode, filp); + return 0; } static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -2010,37 +1917,32 @@ const struct file_operations def_blk_fops = { * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *pathname) +int lookup_bdev(const char *pathname, dev_t *dev) { - struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) - return ERR_PTR(-EINVAL); + return -EINVAL; error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) - return ERR_PTR(error); + return error; inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) - goto fail; + goto out_path_put; error = -EACCES; if (!may_open_dev(&path)) - goto fail; - error = -ENOMEM; - bdev = bd_acquire(inode); - if (!bdev) - goto fail; -out: + goto out_path_put; + + *dev = inode->i_rdev; + error = 0; +out_path_put: path_put(&path); - return bdev; -fail: - bdev = ERR_PTR(error); - goto out; + return error; } EXPORT_SYMBOL(lookup_bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6406b3b8c2b..fbc4b58228f7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -929,16 +929,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, * make sure it's the same device if the device is mounted */ if (device->bdev) { - struct block_device *path_bdev; + int error; + dev_t path_dev; - path_bdev = lookup_bdev(path); - if (IS_ERR(path_bdev)) { + error = lookup_bdev(path, &path_dev); + if (error) { mutex_unlock(&fs_devices->device_list_mutex); - return ERR_CAST(path_bdev); + return ERR_PTR(error); } - if (device->bdev != path_bdev) { - bdput(path_bdev); + if (device->bdev->bd_dev != path_dev) { mutex_unlock(&fs_devices->device_list_mutex); btrfs_warn_in_rcu(device->fs_info, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", @@ -947,7 +947,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), diff --git a/fs/inode.c b/fs/inode.c index 9d78c37b00b8..cb008acf0efd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -155,7 +155,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_bytes = 0; inode->i_generation = 0; inode->i_pipe = NULL; - inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; inode->i_dir_seq = 0; @@ -580,8 +579,6 @@ static void evict(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); } - if (S_ISBLK(inode->i_mode) && inode->i_bdev) - bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); diff --git a/fs/internal.h b/fs/internal.h index 47be21dfeebe..53f890446e75 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,7 +25,6 @@ extern void __init bdev_cache_init(void); extern int __sync_blockdev(struct block_device *bdev, int wait); void iterate_bdevs(void (*)(struct block_device *, void *), void *); void emergency_thaw_bdev(struct super_block *sb); -void bd_forget(struct inode *inode); #else static inline void bdev_cache_init(void) { @@ -43,9 +42,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb) { return 0; } -static inline void bd_forget(struct inode *inode) -{ -} #endif /* CONFIG_BLOCK */ /* @@ -114,8 +110,7 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); -struct super_block *__get_super(struct block_device *bdev, bool excl); -extern struct super_block *user_get_super(dev_t); +struct super_block *user_get_super(dev_t, bool excl); void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); diff --git a/fs/io_uring.c b/fs/io_uring.c index 4ead291b2976..8f13c0417f94 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2716,11 +2716,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) static bool io_bdev_nowait(struct block_device *bdev) { -#ifdef CONFIG_BLOCK return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); -#else - return true; -#endif } /* @@ -2733,14 +2729,16 @@ static bool io_file_supports_async(struct file *file, int rw) umode_t mode = file_inode(file)->i_mode; if (S_ISBLK(mode)) { - if (io_bdev_nowait(file->f_inode->i_bdev)) + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(I_BDEV(file->f_mapping->host))) return true; return false; } if (S_ISCHR(mode) || S_ISSOCK(mode)) return true; if (S_ISREG(mode)) { - if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && file->f_op != &io_uring_fops) return true; return false; diff --git a/fs/pipe.c b/fs/pipe.c index 0ac197658a2d..c5989cfd564d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1342,9 +1342,8 @@ out_revert_acct: } /* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. + * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is + * not enough to verify that this is a pipe. */ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) { diff --git a/fs/quota/quota.c b/fs/quota/quota.c index f3d32b0d9008..6d16b2be5ac4 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -866,17 +866,18 @@ static bool quotactl_cmd_onoff(int cmd) static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK - struct block_device *bdev; struct super_block *sb; struct filename *tmp = getname(special); bool excl = false, thawed = false; + int error; + dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); - bdev = lookup_bdev(tmp->name); + error = lookup_bdev(tmp->name, &dev); putname(tmp); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); + if (error) + return ERR_PTR(error); if (quotactl_cmd_onoff(cmd)) { excl = true; @@ -886,8 +887,10 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) } retry: - sb = __get_super(bdev, excl); - if (thawed && sb && sb->s_writers.frozen != SB_UNFROZEN) { + sb = user_get_super(dev, excl); + if (!sb) + return ERR_PTR(-ENODEV); + if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { if (excl) up_write(&sb->s_umount); else @@ -897,10 +900,6 @@ retry: put_super(sb); goto retry; } - - bdput(bdev); - if (!sb) - return ERR_PTR(-ENODEV); return sb; #else diff --git a/fs/statfs.c b/fs/statfs.c index 59f33752c131..68cb07788750 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -235,7 +235,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) { - struct super_block *s = user_get_super(dev); + struct super_block *s = user_get_super(dev, false); int err; if (!s) return -EINVAL; diff --git a/fs/super.c b/fs/super.c index 343e5c1e538d..2c6cdea2ab2d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -740,7 +740,14 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -struct super_block *__get_super(struct block_device *bdev, bool excl) +/** + * get_super - get the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. %NULL is returned if no match is found. + */ +struct super_block *get_super(struct block_device *bdev) { struct super_block *sb; @@ -755,17 +762,11 @@ rescan: if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - if (!excl) - down_read(&sb->s_umount); - else - down_write(&sb->s_umount); + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - if (!excl) - up_read(&sb->s_umount); - else - up_write(&sb->s_umount); + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -776,19 +777,6 @@ rescan: return NULL; } -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ -struct super_block *get_super(struct block_device *bdev) -{ - return __get_super(bdev, false); -} -EXPORT_SYMBOL(get_super); - /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for @@ -820,7 +808,7 @@ restart: return NULL; } -struct super_block *user_get_super(dev_t dev) +struct super_block *user_get_super(dev_t dev, bool excl) { struct super_block *sb; @@ -832,11 +820,17 @@ rescan: if (sb->s_dev == dev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (excl) + down_write(&sb->s_umount); + else + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - up_read(&sb->s_umount); + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12810a19edeb..bdd7339bcda4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1973,7 +1973,7 @@ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); -struct block_device *lookup_bdev(const char *); +int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); diff --git a/include/linux/fs.h b/include/linux/fs.h index a61df0dd4f19..b0b358309657 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -696,7 +696,6 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; -- cgit v1.2.3 From 22ae8ce8b89241c94ac00c237752c0ffa37ba5ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 09:23:26 +0100 Subject: block: simplify bdev/disk lookup in blkdev_get To simplify block device lookup and a few other upcoming areas, make sure that we always have a struct block_device available for each disk and each partition, and only find existing block devices in bdget. The only downside of this is that each device and partition uses a little more memory. The upside will be that a lot of code can be simplified. With that all we need to look up the block device is to lookup the inode and do a few sanity checks on the gendisk, instead of the separate lookup for the gendisk. For blk-cgroup which wants to access a gendisk without opening it, a new blkdev_{get,put}_no_open low-level interface is added to replace the previous get_gendisk use. Note that the change to look up block device directly instead of the two step lookup using struct gendisk causes a subtile change in behavior: accessing a non-existing partition on an existing block device can now cause a call to request_module. That call is harmless, and in practice no recent system will access these nodes as they aren't created by udev and static /dev/ setups are unusual. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 42 ++++----- block/blk-iocost.c | 36 ++++---- block/blk.h | 2 +- block/genhd.c | 210 ++++++--------------------------------------- block/partitions/core.c | 29 ++++--- fs/block_dev.c | 177 ++++++++++++++++++++++---------------- include/linux/blk-cgroup.h | 4 +- include/linux/blkdev.h | 6 ++ include/linux/genhd.h | 7 +- 9 files changed, 194 insertions(+), 319 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c68bdf58c9a6..ad02289a4f7f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, } /** - * blkg_conf_prep - parse and prepare for per-blkg config update + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update * @inputp: input string pointer * * Parse the device node prefix part, MAJ:MIN, of per-blkg config update - * from @input and get and return the matching gendisk. *@inputp is + * from @input and get and return the matching bdev. *@inputp is * updated to point past the device node prefix. Returns an ERR_PTR() * value on error. * * Use this function iff blkg_conf_prep() can't be used for some reason. */ -struct gendisk *blkcg_conf_get_disk(char **inputp) +struct block_device *blkcg_conf_open_bdev(char **inputp) { char *input = *inputp; unsigned int major, minor; - struct gendisk *disk; - int key_len, part; + struct block_device *bdev; + int key_len; if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) return ERR_PTR(-EINVAL); @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) return ERR_PTR(-EINVAL); input = skip_spaces(input); - disk = get_gendisk(MKDEV(major, minor), &part); - if (!disk) + bdev = blkdev_get_no_open(MKDEV(major, minor)); + if (!bdev) return ERR_PTR(-ENODEV); - if (part) { - put_disk_and_module(disk); + if (bdev_is_partition(bdev)) { + blkdev_put_no_open(bdev); return ERR_PTR(-ENODEV); } *inputp = input; - return disk; + return bdev; } /** @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx) - __acquires(rcu) __acquires(&disk->queue->queue_lock) + __acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock) { - struct gendisk *disk; + struct block_device *bdev; struct request_queue *q; struct blkcg_gq *blkg; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - q = disk->queue; + q = bdev->bd_disk->queue; rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto success; } success: - ctx->disk = disk; + ctx->bdev = bdev; ctx->blkg = blkg; ctx->body = input; return 0; @@ -700,7 +700,7 @@ fail_unlock: spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); fail: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); * with blkg_conf_prep(). */ void blkg_conf_finish(struct blkg_conf_ctx *ctx) - __releases(&ctx->disk->queue->queue_lock) __releases(rcu) + __releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu) { - spin_unlock_irq(&ctx->disk->queue->queue_lock); + spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); rcu_read_unlock(); - put_disk_and_module(ctx->disk); + blkdev_put_no_open(ctx->bdev); } EXPORT_SYMBOL_GPL(blkg_conf_finish); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index bbe86d1199dc..8e20fe4bddec 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3120,23 +3120,23 @@ static const match_table_t qos_tokens = { static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3231,12 +3231,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } @@ -3287,23 +3287,23 @@ static const match_table_t i_lcoef_tokens = { static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3356,13 +3356,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } diff --git a/block/blk.h b/block/blk.h index dfab98465db9..c4839abcfa27 100644 --- a/block/blk.h +++ b/block/blk.h @@ -352,7 +352,6 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); int blk_alloc_devt(struct hd_struct *part, dev_t *devt); void blk_free_devt(dev_t devt); -void blk_invalidate_devt(dev_t devt); char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 @@ -384,6 +383,7 @@ static inline void hd_free_part(struct hd_struct *part) { free_percpu(part->dkstats); kfree(part->info); + bdput(part->bdev); percpu_ref_exit(&part->ref); } diff --git a/block/genhd.c b/block/genhd.c index f46e89226fdf..bf8fa82f135f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -27,17 +27,11 @@ static struct kobject *block_depr; -static DEFINE_XARRAY(bdev_map); -static DEFINE_MUTEX(bdev_map_lock); +DECLARE_RWSEM(bdev_lookup_sem); /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) - -/* For extended devt allocation. ext_devt_lock prevents look up - * results from going away underneath its user. - */ -static DEFINE_SPINLOCK(ext_devt_lock); -static DEFINE_IDR(ext_devt_idr); +static DEFINE_IDA(ext_devt_ida); static void disk_check_events(struct disk_events *ev, unsigned int *clearing_ptr); @@ -580,14 +574,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) return 0; } - /* allocate ext devt */ - idr_preload(GFP_KERNEL); - - spin_lock_bh(&ext_devt_lock); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); - spin_unlock_bh(&ext_devt_lock); - - idr_preload_end(); + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -606,26 +593,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - if (devt == MKDEV(0, 0)) - return; - - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } -} - -/* - * We invalidate devt by assigning NULL pointer for devt in idr. - */ -void blk_invalidate_devt(dev_t devt) -{ - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } + if (MAJOR(devt) == BLOCK_EXT_MAJOR) + ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); } static char *bdevt_str(dev_t devt, char *buf) @@ -640,28 +609,6 @@ static char *bdevt_str(dev_t devt, char *buf) return buf; } -static void blk_register_region(struct gendisk *disk) -{ - int i; - - mutex_lock(&bdev_map_lock); - for (i = 0; i < disk->minors; i++) { - if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL)) - WARN_ON_ONCE(1); - } - mutex_unlock(&bdev_map_lock); -} - -static void blk_unregister_region(struct gendisk *disk) -{ - int i; - - mutex_lock(&bdev_map_lock); - for (i = 0; i < disk->minors; i++) - xa_erase(&bdev_map, disk_devt(disk) + i); - mutex_unlock(&bdev_map_lock); -} - static void disk_scan_partitions(struct gendisk *disk) { struct block_device *bdev; @@ -805,7 +752,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); WARN_ON(ret); bdi_set_owner(bdi, dev); - blk_register_region(disk); + bdev_add(disk->part0.bdev, devt); } register_disk(parent, disk, groups); if (register_queue) @@ -847,8 +794,8 @@ static void invalidate_partition(struct gendisk *disk, int partno) __invalidate_device(bdev, true); /* - * Unhash the bdev inode for this device so that it gets evicted as soon - * as last inode reference is dropped. + * Unhash the bdev inode for this device so that it can't be looked + * up any more even if openers still hold references to it. */ remove_inode_hash(bdev->bd_inode); bdput(bdev); @@ -890,7 +837,8 @@ void del_gendisk(struct gendisk *disk) * Block lookups of the disk until all bdevs are unhashed and the * disk is marked as dead (GENHD_FL_UP cleared). */ - down_write(&disk->lookup_sem); + down_write(&bdev_lookup_sem); + /* invalidate stuff */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); @@ -903,7 +851,7 @@ void del_gendisk(struct gendisk *disk) invalidate_partition(disk, 0); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; - up_write(&disk->lookup_sem); + up_write(&bdev_lookup_sem); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -916,16 +864,6 @@ void del_gendisk(struct gendisk *disk) } blk_unregister_queue(disk); - - if (!(disk->flags & GENHD_FL_HIDDEN)) - blk_unregister_region(disk); - /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. - */ - blk_invalidate_devt(disk_devt(disk)); kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); @@ -964,7 +902,7 @@ static ssize_t disk_badblocks_store(struct device *dev, return badblocks_store(disk->bb, page, len, 0); } -static void request_gendisk_module(dev_t devt) +void blk_request_module(dev_t devt) { unsigned int major = MAJOR(devt); struct blk_major_name **n; @@ -984,84 +922,6 @@ static void request_gendisk_module(dev_t devt) request_module("block-major-%d", MAJOR(devt)); } -static bool get_disk_and_module(struct gendisk *disk) -{ - struct module *owner; - - if (!disk->fops) - return false; - owner = disk->fops->owner; - if (owner && !try_module_get(owner)) - return false; - if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) { - module_put(owner); - return false; - } - return true; - -} - -/** - * get_gendisk - get partitioning information for a given device - * @devt: device to get partitioning information for - * @partno: returned partition index - * - * This function gets the structure containing partitioning - * information for the given device @devt. - * - * Context: can sleep - */ -struct gendisk *get_gendisk(dev_t devt, int *partno) -{ - struct gendisk *disk = NULL; - - might_sleep(); - - if (MAJOR(devt) != BLOCK_EXT_MAJOR) { - mutex_lock(&bdev_map_lock); - disk = xa_load(&bdev_map, devt); - if (!disk) { - mutex_unlock(&bdev_map_lock); - request_gendisk_module(devt); - mutex_lock(&bdev_map_lock); - disk = xa_load(&bdev_map, devt); - } - if (disk && !get_disk_and_module(disk)) - disk = NULL; - if (disk) - *partno = devt - disk_devt(disk); - mutex_unlock(&bdev_map_lock); - } else { - struct hd_struct *part; - - spin_lock_bh(&ext_devt_lock); - part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - if (part && get_disk_and_module(part_to_disk(part))) { - *partno = part->partno; - disk = part_to_disk(part); - } - spin_unlock_bh(&ext_devt_lock); - } - - if (!disk) - return NULL; - - /* - * Synchronize with del_gendisk() to not return disk that is being - * destroyed. - */ - down_read(&disk->lookup_sem); - if (unlikely((disk->flags & GENHD_FL_HIDDEN) || - !(disk->flags & GENHD_FL_UP))) { - up_read(&disk->lookup_sem); - put_disk_and_module(disk); - disk = NULL; - } else { - up_read(&disk->lookup_sem); - } - return disk; -} - /** * bdget_disk - do bdget() by gendisk and partition number * @disk: gendisk of interest @@ -1559,11 +1419,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) * * This function releases all allocated resources of the gendisk. * - * The struct gendisk refcount is incremented with get_gendisk() or - * get_disk_and_module(), and its refcount is decremented with - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this - * function is called. - * * Drivers which used __device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the @@ -1748,16 +1603,17 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (!disk) return NULL; + disk->part0.bdev = bdev_alloc(disk, 0); + if (!disk->part0.bdev) + goto out_free_disk; + disk->part0.dkstats = alloc_percpu(struct disk_stats); if (!disk->part0.dkstats) - goto out_free_disk; + goto out_bdput; - init_rwsem(&disk->lookup_sem); disk->node_id = node_id; - if (disk_expand_part_tbl(disk, 0)) { - free_percpu(disk->part0.dkstats); - goto out_free_disk; - } + if (disk_expand_part_tbl(disk, 0)) + goto out_free_bdstats; ptbl = rcu_dereference_protected(disk->part_tbl, 1); rcu_assign_pointer(ptbl->part[0], &disk->part0); @@ -1773,7 +1629,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) */ hd_sects_seq_init(&disk->part0); if (hd_ref_init(&disk->part0)) - goto out_free_part0; + goto out_free_bdstats; disk->minors = minors; rand_initialize_disk(disk); @@ -1782,8 +1638,10 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) device_initialize(disk_to_dev(disk)); return disk; -out_free_part0: - hd_free_part(&disk->part0); +out_free_bdstats: + free_percpu(disk->part0.dkstats); +out_bdput: + bdput(disk->part0.bdev); out_free_disk: kfree(disk); return NULL; @@ -1807,26 +1665,6 @@ void put_disk(struct gendisk *disk) } EXPORT_SYMBOL(put_disk); -/** - * put_disk_and_module - decrements the module and gendisk refcount - * @disk: the struct gendisk to decrement the refcount for - * - * This is a counterpart of get_disk_and_module() and thus also of - * get_gendisk(). - * - * Context: Any context, but the last reference must not be dropped from - * atomic context. - */ -void put_disk_and_module(struct gendisk *disk) -{ - if (disk) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - } -} - static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; diff --git a/block/partitions/core.c b/block/partitions/core.c index a02e22411594..696bd9ff63c6 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -340,12 +340,11 @@ void delete_partition(struct hd_struct *part) device_del(part_to_dev(part)); /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. + * Remove the block device from the inode hash, so that it cannot be + * looked up any more even when openers still hold references. */ - blk_invalidate_devt(part_devt(part)); + remove_inode_hash(part->bdev->bd_inode); + percpu_ref_kill(&part->ref); } @@ -368,6 +367,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, dev_t devt = MKDEV(0, 0); struct device *ddev = disk_to_dev(disk); struct device *pdev; + struct block_device *bdev; struct disk_part_tbl *ptbl; const char *dname; int err; @@ -402,11 +402,15 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!p) return ERR_PTR(-EBUSY); + err = -ENOMEM; p->dkstats = alloc_percpu(struct disk_stats); - if (!p->dkstats) { - err = -ENOMEM; + if (!p->dkstats) goto out_free; - } + + bdev = bdev_alloc(disk, partno); + if (!bdev) + goto out_free_stats; + p->bdev = bdev; hd_sects_seq_init(p); pdev = part_to_dev(p); @@ -420,10 +424,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, struct partition_meta_info *pinfo; pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); - if (!pinfo) { - err = -ENOMEM; - goto out_free_stats; - } + if (!pinfo) + goto out_bdput; memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } @@ -470,6 +472,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, } /* everything is up and running, commence */ + bdev_add(bdev, devt); rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk suppresses it */ @@ -479,6 +482,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, out_free_info: kfree(p->info); +out_bdput: + bdput(bdev); out_free_stats: free_percpu(p->dkstats); out_free: diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d6e4d50834c..b350ed3af83b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -863,31 +863,46 @@ void __init bdev_cache_init(void) blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } -static struct block_device *bdget(dev_t dev) +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) { struct block_device *bdev; struct inode *inode; - inode = iget_locked(blockdev_superblock, dev); + inode = new_inode(blockdev_superblock); if (!inode) return NULL; + inode->i_mode = S_IFBLK; + inode->i_rdev = 0; + inode->i_data.a_ops = &def_blk_aops; + mapping_set_gfp_mask(&inode->i_data, GFP_USER); + + bdev = I_BDEV(inode); + spin_lock_init(&bdev->bd_size_lock); + bdev->bd_disk = disk; + bdev->bd_partno = partno; + bdev->bd_contains = NULL; + bdev->bd_super = NULL; + bdev->bd_inode = inode; + bdev->bd_part_count = 0; + return bdev; +} - bdev = &BDEV_I(inode)->bdev; +void bdev_add(struct block_device *bdev, dev_t dev) +{ + bdev->bd_dev = dev; + bdev->bd_inode->i_rdev = dev; + bdev->bd_inode->i_ino = dev; + insert_inode_hash(bdev->bd_inode); +} - if (inode->i_state & I_NEW) { - spin_lock_init(&bdev->bd_size_lock); - bdev->bd_contains = NULL; - bdev->bd_super = NULL; - bdev->bd_inode = inode; - bdev->bd_part_count = 0; - bdev->bd_dev = dev; - inode->i_mode = S_IFBLK; - inode->i_rdev = dev; - inode->i_data.a_ops = &def_blk_aops; - mapping_set_gfp_mask(&inode->i_data, GFP_USER); - unlock_new_inode(inode); - } - return bdev; +static struct block_device *bdget(dev_t dev) +{ + struct inode *inode; + + inode = ilookup(blockdev_superblock, dev); + if (!inode) + return NULL; + return &BDEV_I(inode)->bdev; } /** @@ -1004,27 +1019,6 @@ retry: } EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) -{ - struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); - - if (!disk) - return NULL; - /* - * Now that we hold gendisk reference we make sure bdev we looked up is - * not stale. If it is, it means device got removed and created before - * we looked up gendisk and we fail open in such case. Associating - * unhashed bdev with newly created gendisk could lead to two bdevs - * (and thus two independent caches) being associated with one device - * which is bad. - */ - if (inode_unhashed(bdev->bd_inode)) { - put_disk_and_module(disk); - return NULL; - } - return disk; -} - static void bd_clear_claiming(struct block_device *whole, void *holder) { lockdep_assert_held(&bdev_lock); @@ -1347,19 +1341,17 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ -static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, - int partno, fmode_t mode) +static int __blkdev_get(struct block_device *bdev, fmode_t mode) { + struct gendisk *disk = bdev->bd_disk; int ret; if (!bdev->bd_openers) { - bdev->bd_disk = disk; bdev->bd_contains = bdev; - bdev->bd_partno = partno; - if (!partno) { + if (!bdev->bd_partno) { ret = -ENXIO; - bdev->bd_part = disk_get_part(disk, partno); + bdev->bd_part = disk_get_part(disk, 0); if (!bdev->bd_part) goto out_clear; @@ -1388,7 +1380,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, struct block_device *whole = bdget_disk(disk, 0); mutex_lock_nested(&whole->bd_mutex, 1); - ret = __blkdev_get(whole, disk, 0, mode); + ret = __blkdev_get(whole, mode); if (ret) { mutex_unlock(&whole->bd_mutex); bdput(whole); @@ -1398,7 +1390,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, mutex_unlock(&whole->bd_mutex); bdev->bd_contains = whole; - bdev->bd_part = disk_get_part(disk, partno); + bdev->bd_part = disk_get_part(disk, bdev->bd_partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { __blkdev_put(whole, mode, 1); @@ -1430,12 +1422,53 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, out_clear: disk_put_part(bdev->bd_part); - bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_contains = NULL; return ret; } +struct block_device *blkdev_get_no_open(dev_t dev) +{ + struct block_device *bdev; + struct gendisk *disk; + + down_read(&bdev_lookup_sem); + bdev = bdget(dev); + if (!bdev) { + up_read(&bdev_lookup_sem); + blk_request_module(dev); + down_read(&bdev_lookup_sem); + + bdev = bdget(dev); + if (!bdev) + goto unlock; + } + + disk = bdev->bd_disk; + if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) + goto bdput; + if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + goto put_disk; + if (!try_module_get(bdev->bd_disk->fops->owner)) + goto put_disk; + up_read(&bdev_lookup_sem); + return bdev; +put_disk: + put_disk(disk); +bdput: + bdput(bdev); +unlock: + up_read(&bdev_lookup_sem); + return NULL; +} + +void blkdev_put_no_open(struct block_device *bdev) +{ + module_put(bdev->bd_disk->fops->owner); + put_disk(bdev->bd_disk); + bdput(bdev); +} + /** * blkdev_get_by_dev - open a block device by device number * @dev: device number of block device to open @@ -1463,7 +1496,6 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) bool unblock_events = true; struct block_device *bdev; struct gendisk *disk; - int partno; int ret; ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, @@ -1473,18 +1505,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) if (ret) return ERR_PTR(ret); - bdev = bdget(dev); - if (!bdev) - return ERR_PTR(-ENOMEM); - /* * If we lost a race with 'disk' being deleted, try again. See md.c. */ retry: - ret = -ENXIO; - disk = bdev_get_gendisk(bdev, &partno); - if (!disk) - goto bdput; + bdev = blkdev_get_no_open(dev); + if (!bdev) + return ERR_PTR(-ENXIO); + disk = bdev->bd_disk; if (mode & FMODE_EXCL) { WARN_ON_ONCE(!holder); @@ -1492,7 +1520,7 @@ retry: ret = -ENOMEM; claiming = bdget_disk(disk, 0); if (!claiming) - goto put_disk; + goto put_blkdev; ret = bd_prepare_to_claim(bdev, claiming, holder); if (ret) goto put_claiming; @@ -1501,12 +1529,10 @@ retry: disk_block_events(disk); mutex_lock(&bdev->bd_mutex); - ret =__blkdev_get(bdev, disk, partno, mode); - if (!(mode & FMODE_EXCL)) { - ; /* nothing to do here */ - } else if (ret) { - bd_abort_claiming(bdev, claiming, holder); - } else { + ret =__blkdev_get(bdev, mode); + if (ret) + goto abort_claiming; + if (mode & FMODE_EXCL) { bd_finish_claiming(bdev, claiming, holder); /* @@ -1526,21 +1552,23 @@ retry: if (unblock_events) disk_unblock_events(disk); + if (mode & FMODE_EXCL) + bdput(claiming); + return bdev; +abort_claiming: + if (mode & FMODE_EXCL) + bd_abort_claiming(bdev, claiming, holder); + mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); put_claiming: if (mode & FMODE_EXCL) bdput(claiming); -put_disk: - if (ret) - put_disk_and_module(disk); +put_blkdev: + blkdev_put_no_open(bdev); if (ret == -ERESTARTSYS) goto retry; -bdput: - if (ret) { - bdput(bdev); - return ERR_PTR(ret); - } - return bdev; + return ERR_PTR(ret); } EXPORT_SYMBOL(blkdev_get_by_dev); @@ -1641,7 +1669,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; - bdev->bd_disk = NULL; if (bdev_is_partition(bdev)) victim = bdev->bd_contains; bdev->bd_contains = NULL; @@ -1699,12 +1726,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * from userland - e.g. eject(1). */ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); - mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); - bdput(bdev); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); } EXPORT_SYMBOL(blkdev_put); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c8fc9792ac77..b9f3c246c3c9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { - struct gendisk *disk; + struct block_device *bdev; struct blkcg_gq *blkg; char *body; }; -struct gendisk *blkcg_conf_get_disk(char **inputp); +struct block_device *blkcg_conf_open_bdev(char **inputp); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd7339bcda4..5d48b92f5e43 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +/* just for blk-cgroup, don't use elsewhere */ +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); +void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ca5e356084c3..42a51653c730 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,6 +65,7 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; + struct block_device *bdev; struct device __dev; struct kobject *holder_dir; int policy, partno; @@ -193,7 +194,6 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 - struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern void put_disk(struct gendisk *disk); -extern void put_disk_and_module(struct gendisk *disk); #define alloc_disk_node(minors, node_id) \ ({ \ @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +extern struct rw_semaphore bdev_lookup_sem; + dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK void printk_all_partitions(void); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From a954ea812018a84d350b316c39a2be3edc4b7ca8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:29:55 +0100 Subject: block: remove ->bd_contains Now that each hd_struct has a reference to the corresponding block_device, there is no need for the bd_contains pointer. Add a bdev_whole() helper to look up the whole device block_device struture instead. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- drivers/scsi/scsicam.c | 2 +- fs/block_dev.c | 22 ++++++++-------------- include/linux/blk_types.h | 4 +++- 4 files changed, 13 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 26c7aafba7c5..c0df88b3300c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1088,7 +1088,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; + claimed_bdev = bdev_whole(bdev); error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); if (error) goto out_putf; diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index 682cf08ab041..f1553a453616 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -32,7 +32,7 @@ */ unsigned char *scsi_bios_ptable(struct block_device *dev) { - struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping; + struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping; unsigned char *res = NULL; struct page *page; diff --git a/fs/block_dev.c b/fs/block_dev.c index b350ed3af83b..94baee369d26 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -119,7 +119,7 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, * under live filesystem. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; + claimed_bdev = bdev_whole(bdev); err = bd_prepare_to_claim(bdev, claimed_bdev, truncate_bdev_range); if (err) @@ -880,7 +880,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) spin_lock_init(&bdev->bd_size_lock); bdev->bd_disk = disk; bdev->bd_partno = partno; - bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; @@ -1347,9 +1346,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) int ret; if (!bdev->bd_openers) { - bdev->bd_contains = bdev; - - if (!bdev->bd_partno) { + if (!bdev_is_partition(bdev)) { ret = -ENXIO; bdev->bd_part = disk_get_part(disk, 0); if (!bdev->bd_part) @@ -1389,7 +1386,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) whole->bd_part_count++; mutex_unlock(&whole->bd_mutex); - bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, bdev->bd_partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { @@ -1405,7 +1401,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) if (bdev->bd_bdi == &noop_backing_dev_info) bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { - if (bdev->bd_contains == bdev) { + if (!bdev_is_partition(bdev)) { ret = 0; if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); @@ -1423,7 +1419,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) out_clear: disk_put_part(bdev->bd_part); bdev->bd_part = NULL; - bdev->bd_contains = NULL; return ret; } @@ -1670,8 +1665,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; if (bdev_is_partition(bdev)) - victim = bdev->bd_contains; - bdev->bd_contains = NULL; + victim = bdev_whole(bdev); } else { if (!bdev_is_partition(bdev) && disk->fops->release) disk->fops->release(disk, mode); @@ -1690,6 +1684,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { + struct block_device *whole = bdev_whole(bdev); bool bdev_free; /* @@ -1700,13 +1695,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); - WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); + WARN_ON_ONCE(--whole->bd_holders < 0); - /* bd_contains might point to self, check in a separate step */ if ((bdev_free = !bdev->bd_holders)) bdev->bd_holder = NULL; - if (!bdev->bd_contains->bd_holders) - bdev->bd_contains->bd_holder = NULL; + if (!whole->bd_holders) + whole->bd_holder = NULL; spin_unlock(&bdev_lock); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9698f459cc65..2e0a9bd9688d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -32,7 +32,6 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif - struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ @@ -49,6 +48,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_whole(_bdev) \ + ((_bdev)->bd_disk->part0.bdev) + #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) -- cgit v1.2.3 From 37c3fc9abb25cd767ad5b048358336ac89488c16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Nov 2020 21:20:08 +0100 Subject: block: simplify the block device claiming interface Stop passing the whole device as a separate argument given that it can be trivially deducted and cleanup the !holder debug check. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/loop.c | 12 +++++------- fs/block_dev.c | 51 ++++++++++++++++++-------------------------------- include/linux/blkdev.h | 6 ++---- 3 files changed, 25 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c0df88b3300c..d643c67be6ac 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1069,7 +1069,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; - struct block_device *claimed_bdev = NULL; int error; loff_t size; bool partscan; @@ -1088,8 +1087,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev_whole(bdev); - error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); + error = bd_prepare_to_claim(bdev, loop_configure); if (error) goto out_putf; } @@ -1176,15 +1174,15 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); out: diff --git a/fs/block_dev.c b/fs/block_dev.c index 94baee369d26..0569f5ebeb6f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -110,24 +110,20 @@ EXPORT_SYMBOL(invalidate_bdev); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend) { - struct block_device *claimed_bdev = NULL; - int err; - /* * If we don't hold exclusive handle for the device, upgrade to it * while we discard the buffer cache to avoid discarding buffers * under live filesystem. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev_whole(bdev); - err = bd_prepare_to_claim(bdev, claimed_bdev, - truncate_bdev_range); + int err = bd_prepare_to_claim(bdev, truncate_bdev_range); if (err) return err; } + truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, truncate_bdev_range); return 0; } EXPORT_SYMBOL(truncate_bdev_range); @@ -978,7 +974,6 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, /** * bd_prepare_to_claim - claim a block device * @bdev: block device of interest - * @whole: the whole device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * * Claim @bdev. This function fails if @bdev is already claimed by another @@ -988,9 +983,12 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder) +int bd_prepare_to_claim(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + + if (WARN_ON_ONCE(!holder)) + return -EINVAL; retry: spin_lock(&bdev_lock); /* if someone else claimed, fail */ @@ -1030,15 +1028,15 @@ static void bd_clear_claiming(struct block_device *whole, void *holder) /** * bd_finish_claiming - finish claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ -static void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder) +static void bd_finish_claiming(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + spin_lock(&bdev_lock); BUG_ON(!bd_may_claim(bdev, whole, holder)); /* @@ -1063,11 +1061,10 @@ static void bd_finish_claiming(struct block_device *bdev, * also used when exclusive open is not actually desired and we just needed * to block other exclusive openers for a while. */ -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder) +void bd_abort_claiming(struct block_device *bdev, void *holder) { spin_lock(&bdev_lock); - bd_clear_claiming(whole, holder); + bd_clear_claiming(bdev_whole(bdev), holder); spin_unlock(&bdev_lock); } EXPORT_SYMBOL(bd_abort_claiming); @@ -1487,7 +1484,6 @@ void blkdev_put_no_open(struct block_device *bdev) */ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { - struct block_device *claiming; bool unblock_events = true; struct block_device *bdev; struct gendisk *disk; @@ -1510,15 +1506,9 @@ retry: disk = bdev->bd_disk; if (mode & FMODE_EXCL) { - WARN_ON_ONCE(!holder); - - ret = -ENOMEM; - claiming = bdget_disk(disk, 0); - if (!claiming) - goto put_blkdev; - ret = bd_prepare_to_claim(bdev, claiming, holder); + ret = bd_prepare_to_claim(bdev, holder); if (ret) - goto put_claiming; + goto put_blkdev; } disk_block_events(disk); @@ -1528,7 +1518,7 @@ retry: if (ret) goto abort_claiming; if (mode & FMODE_EXCL) { - bd_finish_claiming(bdev, claiming, holder); + bd_finish_claiming(bdev, holder); /* * Block event polling for write claims if requested. Any write @@ -1547,18 +1537,13 @@ retry: if (unblock_events) disk_unblock_events(disk); - if (mode & FMODE_EXCL) - bdput(claiming); return bdev; abort_claiming: if (mode & FMODE_EXCL) - bd_abort_claiming(bdev, claiming, holder); + bd_abort_claiming(bdev, holder); mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); -put_claiming: - if (mode & FMODE_EXCL) - bdput(claiming); put_blkdev: blkdev_put_no_open(bdev); if (ret == -ERESTARTSYS) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d48b92f5e43..43a25d855e04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1988,10 +1988,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder); -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder); +int bd_prepare_to_claim(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); /* just for blk-cgroup, don't use elsewhere */ -- cgit v1.2.3 From c64dc3bd87097e7f08b9437819440f8bfddef995 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:38:15 +0100 Subject: block: simplify part_to_disk Now that struct hd_struct has a block_device pointer use that to find the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 42a51653c730..6ba91ee54cb2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -218,13 +218,9 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { - if (likely(part)) { - if (part->partno) - return dev_to_disk(part_to_dev(part)->parent); - else - return dev_to_disk(part_to_dev(part)); - } - return NULL; + if (unlikely(!part)) + return NULL; + return part->bdev->bd_disk; } static inline int disk_max_parts(struct gendisk *disk) -- cgit v1.2.3 From a782483cc1f875355690625d8253a232f2581418 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:43:37 +0100 Subject: block: remove the nr_sects field in struct hd_struct Now that the hd_struct always has a block device attached to it, there is no need for having two size field that just get out of sync. Additionally the field in hd_struct did not use proper serialization, possibly allowing for torn writes. By only using the block_device field this problem also gets fixed. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- block/bio.c | 4 +-- block/blk-core.c | 2 +- block/blk.h | 53 --------------------------- block/genhd.c | 59 +++++++++++++++++------------- block/partitions/core.c | 17 +++++---- drivers/block/loop.c | 1 - drivers/block/nbd.c | 2 +- drivers/block/xen-blkback/common.h | 4 +-- drivers/md/bcache/super.c | 2 +- drivers/s390/block/dasd_ioctl.c | 4 +-- drivers/target/target_core_pscsi.c | 5 ++- fs/block_dev.c | 73 ++------------------------------------ fs/f2fs/super.c | 2 +- fs/pstore/blk.c | 2 +- include/linux/genhd.h | 29 ++++----------- kernel/trace/blktrace.c | 2 +- 16 files changed, 67 insertions(+), 194 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index fa01bef35bb1..669bb47a3198 100644 --- a/block/bio.c +++ b/block/bio.c @@ -613,8 +613,8 @@ void guard_bio_eod(struct bio *bio) rcu_read_lock(); part = __disk_get_part(bio->bi_disk, bio->bi_partno); if (part) - maxsector = part_nr_sects_read(part); - else + maxsector = bdev_nr_sectors(part->bdev); + else maxsector = get_capacity(bio->bi_disk); rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e..988f45094a38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -755,7 +755,7 @@ static inline int blk_partition_remap(struct bio *bio) goto out; if (bio_sectors(bio)) { - if (bio_check_eod(bio, part_nr_sects_read(p))) + if (bio_check_eod(bio, bdev_nr_sectors(p->bdev))) goto out; bio->bi_iter.bi_sector += p->start_sect; trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), diff --git a/block/blk.h b/block/blk.h index c4839abcfa27..09cee7024fb4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -387,59 +387,6 @@ static inline void hd_free_part(struct hd_struct *part) percpu_ref_exit(&part->ref); } -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - preempt_disable(); - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); - preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} - int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); diff --git a/block/genhd.c b/block/genhd.c index bf8fa82f135f..c65f485b9db5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -40,6 +40,16 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +void set_capacity(struct gendisk *disk, sector_t sectors) +{ + struct block_device *bdev = disk->part0.bdev; + + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} +EXPORT_SYMBOL(set_capacity); + /* * Set disk capacity and notify if the size is not currently zero and will not * be set to zero. Returns true if a uevent was sent, otherwise false. @@ -47,18 +57,30 @@ static void disk_release_events(struct gendisk *disk); bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); + char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); - revalidate_disk_size(disk, true); - if (capacity != size && capacity != 0 && size != 0) { - char *envp[] = { "RESIZE=1", NULL }; + /* + * Only print a message and send a uevent if the gendisk is user visible + * and alive. This avoids spamming the log and udev when setting the + * initial capacity during probing. + */ + if (size == capacity || + (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + return false; - kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); - return true; - } + pr_info("%s: detected capacity change from %lld to %lld\n", + disk->disk_name, size, capacity); - return false; + /* + * Historically we did not send a uevent for changes to/from an empty + * device. + */ + if (!capacity || !size) + return false; + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } EXPORT_SYMBOL_GPL(set_capacity_and_notify); @@ -247,7 +269,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!part_nr_sects_read(part) && + if (!bdev_nr_sectors(part->bdev) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && piter->idx == 0)) @@ -284,7 +306,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); static inline int sector_in_part(struct hd_struct *part, sector_t sector) { return part->start_sect <= sector && - sector < part->start_sect + part_nr_sects_read(part); + sector < part->start_sect + bdev_nr_sectors(part->bdev); } /** @@ -986,8 +1008,8 @@ void __init printk_all_partitions(void) printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), - (unsigned long long)part_nr_sects_read(part) >> 1 - , disk_name(disk, part->partno, name_buf), + bdev_nr_sectors(part->bdev) >> 1, + disk_name(disk, part->partno, name_buf), part->info ? part->info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) @@ -1079,7 +1101,7 @@ static int show_partition(struct seq_file *seqf, void *v) while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), - (unsigned long long)part_nr_sects_read(part) >> 1, + bdev_nr_sectors(part->bdev) >> 1, disk_name(sgp, part->partno, buf)); disk_part_iter_exit(&piter); @@ -1161,8 +1183,7 @@ ssize_t part_size_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n", - (unsigned long long)part_nr_sects_read(p)); + return sprintf(buf, "%llu\n", bdev_nr_sectors(p->bdev)); } ssize_t part_stat_show(struct device *dev, @@ -1618,16 +1639,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) ptbl = rcu_dereference_protected(disk->part_tbl, 1); rcu_assign_pointer(ptbl->part[0], &disk->part0); - /* - * set_capacity() and get_capacity() currently don't use - * seqcounter to read/update the part0->nr_sects. Still init - * the counter as we can read the sectors in IO submission - * patch using seqence counters. - * - * TODO: Ideally set_capacity() and get_capacity() should be - * converted to make use of bd_mutex and sequence counters. - */ - hd_sects_seq_init(&disk->part0); if (hd_ref_init(&disk->part0)) goto out_free_bdstats; diff --git a/block/partitions/core.c b/block/partitions/core.c index 696bd9ff63c6..bcfa8215bd5e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; +static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) +{ + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} + static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { struct parsed_partitions *state; @@ -295,7 +302,7 @@ static void hd_struct_free_work(struct work_struct *work) put_device(disk_to_dev(disk)); part->start_sect = 0; - part->nr_sects = 0; + bdev_set_nr_sectors(part->bdev, 0); part_stat_set_all(part, 0); put_device(part_to_dev(part)); } @@ -412,11 +419,10 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_free_stats; p->bdev = bdev; - hd_sects_seq_init(p); pdev = part_to_dev(p); p->start_sect = start; - p->nr_sects = len; + bdev_set_nr_sectors(bdev, len); p->partno = partno; p->policy = get_disk_ro(disk); @@ -509,7 +515,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { if (part->partno == skip_partno || - start >= part->start_sect + part->nr_sects || + start >= part->start_sect + bdev_nr_sectors(part->bdev) || start + length <= part->start_sect) continue; overlap = true; @@ -600,8 +606,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, length); - bd_set_nr_sectors(bdevp, length); + bdev_set_nr_sectors(bdevp, length); ret = 0; out_unlock: diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d643c67be6ac..d2ce1ddc192d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1241,7 +1241,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 45b0423ef2c5..014683968ce1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1132,7 +1132,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); } static void nbd_parse_flags(struct nbd_device *nbd) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index c6ea5d38c509..0762db247b41 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -358,9 +358,7 @@ struct pending_req { }; -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c55d3c58a7ef..04fa40868fbe 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1408,7 +1408,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) q->limits.raid_partial_stripes_expensive; ret = bcache_device_init(&dc->disk, block_size, - dc->bdev->bd_part->nr_sects - dc->sb.data_offset, + bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, dc->bdev, &bcache_cached_ops); if (ret) return ret; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 3359559517bf..304eba1acf16 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -54,8 +54,6 @@ dasd_ioctl_enable(struct block_device *bdev) return -ENODEV; dasd_enable_device(base); - /* Formatting the dasd device can change the capacity. */ - bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -88,7 +86,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); dasd_put_device(base); return 0; } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 4e37fa9b409d..7994f27e4527 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1029,9 +1029,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); - if (pdv->pdv_bd && pdv->pdv_bd->bd_part) - return pdv->pdv_bd->bd_part->nr_sects; - + if (pdv->pdv_bd) + return bdev_nr_sectors(pdv->pdv_bd); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index a5b6955a841f..31ee5a857f71 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1208,70 +1208,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif -/** - * check_disk_size_change - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @bdev: struct bdev to adjust. - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -static void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev, bool verbose) -{ - loff_t disk_size, bdev_size; - - spin_lock(&bdev->bd_size_lock); - disk_size = (loff_t)get_capacity(disk) << 9; - bdev_size = i_size_read(bdev->bd_inode); - if (disk_size != bdev_size) { - if (verbose) { - printk(KERN_INFO - "%s: detected capacity change from %lld to %lld\n", - disk->disk_name, bdev_size, disk_size); - } - i_size_write(bdev->bd_inode, disk_size); - } - spin_unlock(&bdev->bd_size_lock); -} - -/** - * revalidate_disk_size - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -void revalidate_disk_size(struct gendisk *disk, bool verbose) -{ - struct block_device *bdev; - - /* - * Hidden disks don't have associated bdev so there's no point in - * revalidating them. - */ - if (disk->flags & GENHD_FL_HIDDEN) - return; - - bdev = bdget_disk(disk, 0); - if (bdev) { - check_disk_size_change(disk, bdev, verbose); - bdput(bdev); - } -} - -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) -{ - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); -} -EXPORT_SYMBOL(bd_set_nr_sectors); - static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); int bdev_disk_changed(struct block_device *bdev, bool invalidate) @@ -1305,8 +1241,6 @@ rescan: disk->fops->revalidate_disk(disk); } - check_disk_size_change(disk, bdev, !invalidate); - if (get_capacity(disk)) { ret = blk_add_partitions(disk, bdev); if (ret == -EAGAIN) @@ -1349,10 +1283,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) if (disk->fops->open) ret = disk->fops->open(bdev, mode); - if (!ret) { - bd_set_nr_sectors(bdev, get_capacity(disk)); + if (!ret) set_init_blocksize(bdev); - } /* * If the device is invalidated, rescan partition @@ -1381,13 +1313,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) bdev->bd_part = disk_get_part(disk, bdev->bd_partno); if (!(disk->flags & GENHD_FL_UP) || - !bdev->bd_part || !bdev->bd_part->nr_sects) { + !bdev->bd_part || !bdev_nr_sectors(bdev)) { __blkdev_put(whole, mode, 1); bdput(whole); ret = -ENXIO; goto out_clear; } - bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); set_init_blocksize(bdev); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 00eff2f51807..d4e7fab352ba 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3151,7 +3151,7 @@ static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) { struct block_device *bdev = FDEV(devi).bdev; - sector_t nr_sectors = bdev->bd_part->nr_sects; + sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; int ret; diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index fcd5563dde06..777a26f7bbe2 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -245,7 +245,7 @@ static struct block_device *psblk_get_bdev(void *holder, return bdev; } - nr_sects = part_nr_sects_read(bdev->bd_part); + nr_sects = bdev_nr_sectors(bdev); if (!nr_sects) { pr_err("not enough space for '%s'\n", blkdev); blkdev_put(bdev, mode); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6ba91ee54cb2..30d4785b7df8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,15 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - /* - * nr_sects is protected by sequence counter. One might extend a - * partition while IO is happening to it and update of nr_sects - * can be non-atomic on 32bit machines with 64bit sector_t. - */ - sector_t nr_sects; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_t nr_sects_seq; -#endif unsigned long stamp; struct disk_stats __percpu *dkstats; struct percpu_ref ref; @@ -254,13 +245,6 @@ static inline void disk_put_part(struct hd_struct *part) put_device(part_to_dev(part)); } -static inline void hd_sects_seq_init(struct hd_struct *p) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_init(&p->nr_sects_seq); -#endif -} - /* * Smarter partition iterator without context limits. */ @@ -318,13 +302,15 @@ static inline sector_t get_start_sect(struct block_device *bdev) { return bdev->bd_part->start_sect; } -static inline sector_t get_capacity(struct gendisk *disk) + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) { - return disk->part0.nr_sects; + return i_size_read(bdev->bd_inode) >> 9; } -static inline void set_capacity(struct gendisk *disk, sector_t size) + +static inline sector_t get_capacity(struct gendisk *disk) { - disk->part0.nr_sects = size; + return bdev_nr_sectors(disk->part0.bdev); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); @@ -358,10 +344,9 @@ int __register_blkdev(unsigned int major, const char *name, __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -void revalidate_disk_size(struct gendisk *disk, bool verbose); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void set_capacity(struct gendisk *disk, sector_t size); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index f1022945e346..7076d588a50d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -465,7 +465,7 @@ static void blk_trace_setup_lba(struct blk_trace *bt, if (part) { bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; + bt->end_lba = part->start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; -- cgit v1.2.3 From 15e3d2c5cd53298272e59ad9072d3468f9dd3781 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:00 +0100 Subject: block: move disk stat accounting to struct block_device Move the dkstats and stamp field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-core.c | 4 ++-- block/blk.h | 1 - block/genhd.c | 14 ++++---------- block/partitions/core.c | 9 +-------- fs/block_dev.c | 10 ++++++++++ include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 2 -- include/linux/part_stat.h | 38 +++++++++++++++++++------------------- 9 files changed, 39 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ad02289a4f7f..79aa96240cec 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -830,7 +830,7 @@ static void blkcg_fill_root_iostats(void) for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); + cpu_dkstats = per_cpu_ptr(part->bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += cpu_dkstats->ios[STAT_READ]; tmp.ios[BLKG_IOSTAT_WRITE] += diff --git a/block/blk-core.c b/block/blk-core.c index 988f45094a38..d2c9cb24e087 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1264,9 +1264,9 @@ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) { unsigned long stamp; again: - stamp = READ_ONCE(part->stamp); + stamp = READ_ONCE(part->bdev->bd_stamp); if (unlikely(stamp != now)) { - if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) + if (likely(cmpxchg(&part->bdev->bd_stamp, stamp, now) == stamp)) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } if (part->partno) { diff --git a/block/blk.h b/block/blk.h index 09cee7024fb4..3f801f6e86f8 100644 --- a/block/blk.h +++ b/block/blk.h @@ -381,7 +381,6 @@ static inline void hd_struct_put(struct hd_struct *part) static inline void hd_free_part(struct hd_struct *part) { - free_percpu(part->dkstats); kfree(part->info); bdput(part->bdev); percpu_ref_exit(&part->ref); diff --git a/block/genhd.c b/block/genhd.c index c65f485b9db5..2cbda8139556 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -112,7 +112,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { - struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu); + struct disk_stats *ptr = per_cpu_ptr(part->bdev->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { @@ -891,7 +891,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->slave_dir); part_stat_set_all(&disk->part0, 0); - disk->part0.stamp = 0; + disk->part0.bdev->bd_stamp = 0; if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); @@ -1628,19 +1628,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (!disk->part0.bdev) goto out_free_disk; - disk->part0.dkstats = alloc_percpu(struct disk_stats); - if (!disk->part0.dkstats) - goto out_bdput; - disk->node_id = node_id; if (disk_expand_part_tbl(disk, 0)) - goto out_free_bdstats; + goto out_bdput; ptbl = rcu_dereference_protected(disk->part_tbl, 1); rcu_assign_pointer(ptbl->part[0], &disk->part0); if (hd_ref_init(&disk->part0)) - goto out_free_bdstats; + goto out_bdput; disk->minors = minors; rand_initialize_disk(disk); @@ -1649,8 +1645,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) device_initialize(disk_to_dev(disk)); return disk; -out_free_bdstats: - free_percpu(disk->part0.dkstats); out_bdput: bdput(disk->part0.bdev); out_free_disk: diff --git a/block/partitions/core.c b/block/partitions/core.c index bcfa8215bd5e..8924e1ea8b2a 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -409,14 +409,9 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!p) return ERR_PTR(-EBUSY); - err = -ENOMEM; - p->dkstats = alloc_percpu(struct disk_stats); - if (!p->dkstats) - goto out_free; - bdev = bdev_alloc(disk, partno); if (!bdev) - goto out_free_stats; + goto out_free; p->bdev = bdev; pdev = part_to_dev(p); @@ -490,8 +485,6 @@ out_free_info: kfree(p->info); out_bdput: bdput(bdev); -out_free_stats: - free_percpu(p->dkstats); out_free: kfree(p); return ERR_PTR(err); diff --git a/fs/block_dev.c b/fs/block_dev.c index 31ee5a857f71..0832c7830f3a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -781,6 +782,10 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) static void bdev_free_inode(struct inode *inode) { + struct block_device *bdev = I_BDEV(inode); + + free_percpu(bdev->bd_stats); + kmem_cache_free(bdev_cachep, BDEV_I(inode)); } @@ -875,6 +880,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif + bdev->bd_stats = alloc_percpu(struct disk_stats); + if (!bdev->bd_stats) { + iput(inode); + return NULL; + } return bdev; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2e0a9bd9688d..520011b95276 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,8 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d4785b7df8..804ac45fbfbc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,8 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - unsigned long stamp; - struct disk_stats __percpu *dkstats; struct percpu_ref ref; struct block_device *bdev; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..87ad60106e1d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,17 +25,17 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) + (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->dkstats->field) res = 0; \ + typeof((part)->bdev->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ res; \ }) @@ -44,7 +44,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, + memset(per_cpu_ptr(part->bdev->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,7 +54,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->dkstats->field, addnd) + __this_cpu_add((part)->bdev->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ @@ -63,20 +63,20 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) field, addnd); \ } while (0) -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) +#define part_stat_dec(part, field) \ + part_stat_add(part, field, -1) +#define part_stat_inc(part, field) \ + part_stat_add(part, field, 1) +#define part_stat_sub(part, field, subnd) \ + part_stat_add(part, field, -subnd) -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) +#define part_stat_local_dec(part, field) \ + local_dec(&(part_stat_get(part, field))) +#define part_stat_local_inc(part, field) \ + local_inc(&(part_stat_get(part, field))) +#define part_stat_local_read(part, field) \ + local_read(&(part_stat_get(part, field))) +#define part_stat_local_read_cpu(part, field, cpu) \ + local_read(&(part_stat_get_cpu(part, field, cpu))) #endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 29ff57c61094e7bbd921ab10b5a99dce9a0132e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:24 +0100 Subject: block: move the start_sect field to struct block_device Move the start_sect field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- block/blk-lib.c | 2 +- block/genhd.c | 4 ++-- block/partitions/core.c | 17 +++++++++-------- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- include/linux/genhd.h | 3 +-- kernel/trace/blktrace.c | 11 +++-------- 8 files changed, 22 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index d2c9cb24e087..9a3793d5ce38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -757,9 +757,10 @@ static inline int blk_partition_remap(struct bio *bio) if (bio_sectors(bio)) { if (bio_check_eod(bio, bdev_nr_sectors(p->bdev))) goto out; - bio->bi_iter.bi_sector += p->start_sect; + bio->bi_iter.bi_sector += p->bdev->bd_start_sect; trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), - bio->bi_iter.bi_sector - p->start_sect); + bio->bi_iter.bi_sector - + p->bdev->bd_start_sect); } bio->bi_partno = 0; ret = 0; diff --git a/block/blk-lib.c b/block/blk-lib.c index e90614fd8d6a..752f9c722062 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -65,7 +65,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* In case the discard request is in a partition */ if (bdev_is_partition(bdev)) - part_offset = bdev->bd_part->start_sect; + part_offset = bdev->bd_start_sect; while (nr_sects) { sector_t granularity_aligned_lba, req_sects; diff --git a/block/genhd.c b/block/genhd.c index 2cbda8139556..5efb2df1f079 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -305,8 +305,8 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); static inline int sector_in_part(struct hd_struct *part, sector_t sector) { - return part->start_sect <= sector && - sector < part->start_sect + bdev_nr_sectors(part->bdev); + return part->bdev->bd_start_sect <= sector && + sector < part->bdev->bd_start_sect + bdev_nr_sectors(part->bdev); } /** diff --git a/block/partitions/core.c b/block/partitions/core.c index 8924e1ea8b2a..460a745812c6 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -192,7 +192,7 @@ static ssize_t part_start_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); + return sprintf(buf, "%llu\n", p->bdev->bd_start_sect); } static ssize_t part_ro_show(struct device *dev, @@ -209,7 +209,7 @@ static ssize_t part_alignment_offset_show(struct device *dev, return sprintf(buf, "%u\n", queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, - p->start_sect)); + p->bdev->bd_start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, @@ -219,7 +219,7 @@ static ssize_t part_discard_alignment_show(struct device *dev, return sprintf(buf, "%u\n", queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, - p->start_sect)); + p->bdev->bd_start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -301,7 +301,7 @@ static void hd_struct_free_work(struct work_struct *work) */ put_device(disk_to_dev(disk)); - part->start_sect = 0; + part->bdev->bd_start_sect = 0; bdev_set_nr_sectors(part->bdev, 0); part_stat_set_all(part, 0); put_device(part_to_dev(part)); @@ -416,7 +416,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); - p->start_sect = start; + bdev->bd_start_sect = start; bdev_set_nr_sectors(bdev, len); p->partno = partno; p->policy = get_disk_ro(disk); @@ -508,8 +508,9 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { if (part->partno == skip_partno || - start >= part->start_sect + bdev_nr_sectors(part->bdev) || - start + length <= part->start_sect) + start >= part->bdev->bd_start_sect + + bdev_nr_sectors(part->bdev) || + start + length <= part->bdev->bd_start_sect) continue; overlap = true; break; @@ -592,7 +593,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno, mutex_lock_nested(&bdev->bd_mutex, 1); ret = -EINVAL; - if (start != part->start_sect) + if (start != part->bdev->bd_start_sect) goto out_unlock; ret = -EBUSY; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 520011b95276..a690008f60cd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,7 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; dev_t bd_dev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43a25d855e04..619adea57098 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1488,7 +1488,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.alignment_offset; } @@ -1529,7 +1529,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 804ac45fbfbc..50d27f5d38e2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -51,7 +51,6 @@ struct partition_meta_info { }; struct hd_struct { - sector_t start_sect; struct percpu_ref ref; struct block_device *bdev; @@ -298,7 +297,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_part->start_sect; + return bdev->bd_start_sect; } static inline sector_t bdev_nr_sectors(struct block_device *bdev) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7076d588a50d..8a723a91ec5a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -458,14 +458,9 @@ static struct rchan_callbacks blk_relay_callbacks = { static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { - struct hd_struct *part = NULL; - - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + bdev_nr_sectors(bdev); + if (bdev) { + bt->start_lba = bdev->bd_start_sect; + bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; -- cgit v1.2.3 From 231926dbf0f084211e4ec4f4c006f0bf1f47809a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 12:01:45 +0100 Subject: block: move the partition_meta_info to struct block_device Move the partition_meta_info to struct block_device in preparation for killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk.h | 1 - block/genhd.c | 3 ++- block/partitions/core.c | 18 +++++++----------- fs/block_dev.c | 1 + include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 1 - init/do_mounts.c | 7 ++++--- 7 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/blk.h b/block/blk.h index 3f801f6e86f8..0bd4b58bcbaf 100644 --- a/block/blk.h +++ b/block/blk.h @@ -381,7 +381,6 @@ static inline void hd_struct_put(struct hd_struct *part) static inline void hd_free_part(struct hd_struct *part) { - kfree(part->info); bdput(part->bdev); percpu_ref_exit(&part->ref); } diff --git a/block/genhd.c b/block/genhd.c index 5efb2df1f079..4273e89f07e8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1010,7 +1010,8 @@ void __init printk_all_partitions(void) bdevt_str(part_devt(part), devt_buf), bdev_nr_sectors(part->bdev) >> 1, disk_name(disk, part->partno, name_buf), - part->info ? part->info->uuid : ""); + part->bdev->bd_meta_info ? + part->bdev->bd_meta_info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) printk(" driver: %s\n", diff --git a/block/partitions/core.c b/block/partitions/core.c index 460a745812c6..07df9ff55462 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -275,8 +275,9 @@ static int part_uevent(struct device *dev, struct kobj_uevent_env *env) struct hd_struct *part = dev_to_part(dev); add_uevent_var(env, "PARTN=%u", part->partno); - if (part->info && part->info->volname[0]) - add_uevent_var(env, "PARTNAME=%s", part->info->volname); + if (part->bdev->bd_meta_info && part->bdev->bd_meta_info->volname[0]) + add_uevent_var(env, "PARTNAME=%s", + part->bdev->bd_meta_info->volname); return 0; } @@ -422,13 +423,10 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, p->policy = get_disk_ro(disk); if (info) { - struct partition_meta_info *pinfo; - - pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); - if (!pinfo) + err = -ENOMEM; + bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL); + if (!bdev->bd_meta_info) goto out_bdput; - memcpy(pinfo, info, sizeof(*info)); - p->info = pinfo; } dname = dev_name(ddev); @@ -444,7 +442,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(p, &devt); if (err) - goto out_free_info; + goto out_bdput; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -481,8 +479,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, kobject_uevent(&pdev->kobj, KOBJ_ADD); return p; -out_free_info: - kfree(p->info); out_bdput: bdput(bdev); out_free: diff --git a/fs/block_dev.c b/fs/block_dev.c index 0832c7830f3a..0770f654b09c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -785,6 +785,7 @@ static void bdev_free_inode(struct inode *inode) struct block_device *bdev = I_BDEV(inode); free_percpu(bdev->bd_stats); + kfree(bdev->bd_meta_info); kmem_cache_free(bdev_cachep, BDEV_I(inode)); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a690008f60cd..2f8ede04e5a9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,8 @@ struct block_device { /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; struct super_block *bd_fsfreeze_sb; + + struct partition_meta_info *bd_meta_info; } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 50d27f5d38e2..30d7076155b4 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -57,7 +57,6 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; - struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index 5879edf083b3..368ccb718501 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -79,8 +79,8 @@ static int match_dev_by_uuid(struct device *dev, const void *data) const struct uuidcmp *cmp = data; struct hd_struct *part = dev_to_part(dev); - if (!part->info || - strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) + if (!part->bdev->bd_meta_info || + strncasecmp(cmp->uuid, part->bdev->bd_meta_info->uuid, cmp->len)) return 0; return 1; } @@ -169,7 +169,8 @@ static int match_dev_by_label(struct device *dev, const void *data) const char *label = data; struct hd_struct *part = dev_to_part(dev); - if (!part->info || strcmp(label, part->info->volname)) + if (!part->bdev->bd_meta_info || + strcmp(label, part->bdev->bd_meta_info->volname)) return 0; return 1; } -- cgit v1.2.3 From 1bdd5ae0251d678488dffcf455d4633c2beef1bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 19:00:13 +0100 Subject: block: move holder_dir to struct block_device Move the holder_dir field to struct block_device in preparation for kill struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 5 +++-- block/partitions/core.c | 8 ++++---- fs/block_dev.c | 11 +++++------ include/linux/blk_types.h | 1 + include/linux/genhd.h | 1 - 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 4273e89f07e8..0bd7026cee62 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -681,7 +681,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); + disk->part0.bdev->bd_holder_dir = + kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (disk->flags & GENHD_FL_HIDDEN) { @@ -887,7 +888,7 @@ void del_gendisk(struct gendisk *disk) blk_unregister_queue(disk); - kobject_put(disk->part0.holder_dir); + kobject_put(disk->part0.bdev->bd_holder_dir); kobject_put(disk->slave_dir); part_stat_set_all(&disk->part0, 0); diff --git a/block/partitions/core.c b/block/partitions/core.c index 07df9ff55462..c068471fa654 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -344,7 +344,7 @@ void delete_partition(struct hd_struct *part) */ get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[part->partno], NULL); - kobject_put(part->holder_dir); + kobject_put(part->bdev->bd_holder_dir); device_del(part_to_dev(part)); /* @@ -452,8 +452,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); - if (!p->holder_dir) + bdev->bd_holder_dir = kobject_create_and_add("holders", &pdev->kobj); + if (!bdev->bd_holder_dir) goto out_del; dev_set_uevent_suppress(pdev, 0); @@ -487,7 +487,7 @@ out_free: out_remove_file: device_remove_file(pdev, &dev_attr_whole_disk); out_del: - kobject_put(p->holder_dir); + kobject_put(bdev->bd_holder_dir); device_del(pdev); out_put: put_device(pdev); diff --git a/fs/block_dev.c b/fs/block_dev.c index 0770f654b09c..381c22426f43 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1142,7 +1142,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ - if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) + if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir)) goto out_unlock; holder = bd_find_holder_disk(bdev, disk); @@ -1165,14 +1165,14 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) if (ret) goto out_free; - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; /* * bdev could be deleted beneath us which would implicitly destroy * the holder directory. Hold on to it. */ - kobject_get(bdev->bd_part->holder_dir); + kobject_get(bdev->bd_holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; @@ -1207,9 +1207,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { del_symlink(disk->slave_dir, bdev_kobj(bdev)); - del_symlink(bdev->bd_part->holder_dir, - &disk_to_dev(disk)->kobj); - kobject_put(bdev->bd_part->holder_dir); + del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); + kobject_put(bdev->bd_holder_dir); list_del_init(&holder->list); kfree(holder); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2f8ede04e5a9..c0591e52d7d7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -35,6 +35,7 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif + struct kobject *bd_holder_dir; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d7076155b4..b4a5c05593b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; -- cgit v1.2.3 From b309e9936347232c724eaa13f70533128b4864e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:28:47 +0100 Subject: block: move make_it_fail to struct block_device Move the make_it_fail flag to struct block_device an turn it into a bool in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- block/genhd.c | 4 ++-- include/linux/blk_types.h | 3 +++ include/linux/genhd.h | 3 --- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9a3793d5ce38..9121390be97a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -668,7 +668,8 @@ __setup("fail_make_request=", setup_fail_make_request); static bool should_fail_request(struct hd_struct *part, unsigned int bytes) { - return part->make_it_fail && should_fail(&fail_make_request, bytes); + return part->bdev->bd_make_it_fail && + should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) diff --git a/block/genhd.c b/block/genhd.c index 0bd7026cee62..f9c957739d4b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1292,7 +1292,7 @@ ssize_t part_fail_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->make_it_fail); + return sprintf(buf, "%d\n", p->bdev->bd_make_it_fail); } ssize_t part_fail_store(struct device *dev, @@ -1303,7 +1303,7 @@ ssize_t part_fail_store(struct device *dev, int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) - p->make_it_fail = (i == 0) ? 0 : 1; + p->bdev->bd_make_it_fail = i; return count; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c0591e52d7d7..b237f1e40814 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -52,6 +52,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; struct partition_meta_info *bd_meta_info; +#ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; +#endif } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b4a5c05593b9..349cf6403ccd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -56,9 +56,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; int policy, partno; -#ifdef CONFIG_FAIL_MAKE_REQUEST - int make_it_fail; -#endif struct rcu_work rcu_work; }; -- cgit v1.2.3 From 83950d359010a493462d58c712b1124c877d1b3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:36:02 +0100 Subject: block: move the policy field to struct block_device Move the policy field to struct block_device and rename it to the more descriptive bd_read_only. Also turn the field into a bool as it is used as such. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/genhd.c | 8 ++++---- block/ioctl.c | 2 +- block/partitions/core.c | 4 ++-- include/linux/blk_types.h | 1 + include/linux/genhd.h | 4 ++-- 6 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9121390be97a..d64ffcb6f9ae 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -696,7 +696,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio); - if (part->policy && op_is_write(op)) { + if (part->bdev->bd_read_only && op_is_write(op)) { char b[BDEVNAME_SIZE]; if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index f9c957739d4b..2db1204920a9 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1687,14 +1687,14 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part; - if (disk->part0.policy != flag) { + if (disk->part0.bdev->bd_read_only != flag) { set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; + disk->part0.bdev->bd_read_only = flag; } disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + part->bdev->bd_read_only = flag; disk_part_iter_exit(&piter); } @@ -1704,7 +1704,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_read_only; } EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index a6d8171221c7..d61d652078f4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -345,7 +345,7 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, if (ret) return ret; } - bdev->bd_part->policy = n; + bdev->bd_read_only = n; return 0; } diff --git a/block/partitions/core.c b/block/partitions/core.c index c068471fa654..060c1be13cd8 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -199,7 +199,7 @@ static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%d\n", p->bdev->bd_read_only); } static ssize_t part_alignment_offset_show(struct device *dev, @@ -420,7 +420,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, bdev->bd_start_sect = start; bdev_set_nr_sectors(bdev, len); p->partno = partno; - p->policy = get_disk_ro(disk); + bdev->bd_read_only = get_disk_ro(disk); if (info) { err = -ENOMEM; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b237f1e40814..758cf71c9aa2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -23,6 +23,7 @@ struct block_device { sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 349cf6403ccd..dcbf9ef7610e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,7 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - int policy, partno; + int partno; struct rcu_work rcu_work; }; @@ -278,7 +278,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.bdev->bd_read_only; } extern void disk_block_events(struct gendisk *disk); -- cgit v1.2.3 From cb8432d650fe3be58bb962bc8e602dc405510327 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:47:17 +0100 Subject: block: allocate struct hd_struct as part of struct bdev_inode Allocate hd_struct together with struct block_device to pre-load the lifetime rule changes in preparation of merging the two structures. Note that part0 was previously embedded into struct gendisk, but is a separate allocation now, and already points to the block_device instead of the hd_struct. The lifetime of struct gendisk is still controlled by the struct device embedded in the part0 hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 16 ++++----- block/blk-flush.c | 2 +- block/blk-merge.c | 2 -- block/blk.h | 21 ------------ block/genhd.c | 50 +++++++++++----------------- block/partitions/core.c | 67 ++++---------------------------------- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 3 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 2 +- fs/block_dev.c | 39 +++++++--------------- include/linux/blk_types.h | 2 +- include/linux/genhd.h | 14 ++++---- include/linux/part_stat.h | 4 +-- 15 files changed, 61 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index d64ffcb6f9ae..9ea70275fc1c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -714,7 +714,8 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) static noinline int should_fail_bio(struct bio *bio) { - if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) + if (should_fail_request(bio->bi_disk->part0->bd_part, + bio->bi_iter.bi_size)) return -EIO; return 0; } @@ -831,7 +832,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(blk_partition_remap(bio))) goto end_io; } else { - if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + if (unlikely(bio_check_ro(bio, bio->bi_disk->part0->bd_part))) goto end_io; if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) goto end_io; @@ -1203,7 +1204,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * return ret; if (rq->rq_disk && - should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) + should_fail_request(rq->rq_disk->part0->bd_part, blk_rq_bytes(rq))) return BLK_STS_IOERR; if (blk_crypto_insert_cloned_request(rq)) @@ -1272,7 +1273,7 @@ again: __part_stat_add(part, io_ticks, end ? now - stamp : 1); } if (part->partno) { - part = &part_to_disk(part)->part0; + part = part_to_disk(part)->part0->bd_part; goto again; } } @@ -1309,8 +1310,6 @@ void blk_account_io_done(struct request *req, u64 now) part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); - - hd_struct_put(part); } } @@ -1354,7 +1353,7 @@ EXPORT_SYMBOL_GPL(part_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(&disk->part0, sectors, op); + return __part_start_io_acct(disk->part0->bd_part, sectors, op); } EXPORT_SYMBOL(disk_start_io_acct); @@ -1376,14 +1375,13 @@ void part_end_io_acct(struct hd_struct *part, struct bio *bio, unsigned long start_time) { __part_end_io_acct(part, bio_op(bio), start_time); - hd_struct_put(part); } EXPORT_SYMBOL_GPL(part_end_io_acct); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time) { - __part_end_io_acct(&disk->part0, op, start_time); + __part_end_io_acct(disk->part0->bd_part, op, start_time); } EXPORT_SYMBOL(disk_end_io_acct); diff --git a/block/blk-flush.c b/block/blk-flush.c index e32958f0b687..fcd0a60574df 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -139,7 +139,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = &rq->rq_disk->part0; + struct hd_struct *part = rq->rq_disk->part0->bd_part; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); diff --git a/block/blk-merge.c b/block/blk-merge.c index bcf5e4580603..cb351ab9b77d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -683,8 +683,6 @@ static void blk_account_io_merge_request(struct request *req) part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_unlock(); - - hd_struct_put(req->part); } } diff --git a/block/blk.h b/block/blk.h index 0bd4b58bcbaf..32ac41f7557f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -363,27 +363,6 @@ int bdev_del_partition(struct block_device *bdev, int partno); int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int disk_expand_part_tbl(struct gendisk *disk, int target); -int hd_ref_init(struct hd_struct *part); - -/* no need to get/put refcount of part0 */ -static inline int hd_struct_try_get(struct hd_struct *part) -{ - if (part->partno) - return percpu_ref_tryget_live(&part->ref); - return 1; -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - if (part->partno) - percpu_ref_put(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - bdput(part->bdev); - percpu_ref_exit(&part->ref); -} int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, diff --git a/block/genhd.c b/block/genhd.c index 2db1204920a9..c35b03dac5e5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -42,7 +42,7 @@ static void disk_release_events(struct gendisk *disk); void set_capacity(struct gendisk *disk, sector_t sectors) { - struct block_device *bdev = disk->part0.bdev; + struct block_device *bdev = disk->part0; spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); @@ -318,9 +318,7 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector) * primarily used for stats accounting. * * CONTEXT: - * RCU read locked. The returned partition pointer is always valid - * because its refcount is grabbed except for part0, which lifetime - * is same with the disk. + * RCU read locked. * * RETURNS: * Found partition on success, part0 is returned if no partition matches @@ -336,26 +334,19 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) ptbl = rcu_dereference(disk->part_tbl); part = rcu_dereference(ptbl->last_lookup); - if (part && sector_in_part(part, sector) && hd_struct_try_get(part)) + if (part && sector_in_part(part, sector)) goto out_unlock; for (i = 1; i < ptbl->len; i++) { part = rcu_dereference(ptbl->part[i]); if (part && sector_in_part(part, sector)) { - /* - * only live partition can be cached for lookup, - * so use-after-free on cached & deleting partition - * can be avoided - */ - if (!hd_struct_try_get(part)) - break; rcu_assign_pointer(ptbl->last_lookup, part); goto out_unlock; } } - part = &disk->part0; + part = disk->part0->bd_part; out_unlock: rcu_read_unlock(); return part; @@ -681,8 +672,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - disk->part0.bdev->bd_holder_dir = - kobject_create_and_add("holders", &ddev->kobj); + disk->part0->bd_holder_dir = + kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (disk->flags & GENHD_FL_HIDDEN) { @@ -748,7 +739,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, disk->flags |= GENHD_FL_UP; - retval = blk_alloc_devt(&disk->part0, &devt); + retval = blk_alloc_devt(disk->part0->bd_part, &devt); if (retval) { WARN_ON(1); return; @@ -775,7 +766,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); WARN_ON(ret); bdi_set_owner(bdi, dev); - bdev_add(disk->part0.bdev, devt); + bdev_add(disk->part0, devt); } register_disk(parent, disk, groups); if (register_queue) @@ -888,11 +879,11 @@ void del_gendisk(struct gendisk *disk) blk_unregister_queue(disk); - kobject_put(disk->part0.bdev->bd_holder_dir); + kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); - part_stat_set_all(&disk->part0, 0); - disk->part0.bdev->bd_stamp = 0; + part_stat_set_all(disk->part0->bd_part, 0); + disk->part0->bd_stamp = 0; if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); @@ -1005,7 +996,7 @@ void __init printk_all_partitions(void) */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) { - bool is_part0 = part == &disk->part0; + bool is_part0 = part == disk->part0->bd_part; printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), @@ -1460,7 +1451,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); - hd_free_part(&disk->part0); + bdput(disk->part0); if (disk->queue) blk_put_queue(disk->queue); kfree(disk); @@ -1626,8 +1617,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (!disk) return NULL; - disk->part0.bdev = bdev_alloc(disk, 0); - if (!disk->part0.bdev) + disk->part0 = bdev_alloc(disk, 0); + if (!disk->part0) goto out_free_disk; disk->node_id = node_id; @@ -1635,10 +1626,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) goto out_bdput; ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[0], &disk->part0); - - if (hd_ref_init(&disk->part0)) - goto out_bdput; + rcu_assign_pointer(ptbl->part[0], disk->part0->bd_part); disk->minors = minors; rand_initialize_disk(disk); @@ -1648,7 +1636,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) return disk; out_bdput: - bdput(disk->part0.bdev); + bdput(disk->part0); out_free_disk: kfree(disk); return NULL; @@ -1687,9 +1675,9 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part; - if (disk->part0.bdev->bd_read_only != flag) { + if (disk->part0->bd_read_only != flag) { set_disk_ro_uevent(disk, flag); - disk->part0.bdev->bd_read_only = flag; + disk->part0->bd_read_only = flag; } disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); diff --git a/block/partitions/core.c b/block/partitions/core.c index 060c1be13cd8..6d1fca193cbd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -265,9 +265,9 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + blk_free_devt(dev->devt); - hd_free_part(p); - kfree(p); + bdput(p->bdev); } static int part_uevent(struct device *dev, struct kobj_uevent_env *env) @@ -288,46 +288,6 @@ struct device_type part_type = { .uevent = part_uevent, }; -static void hd_struct_free_work(struct work_struct *work) -{ - struct hd_struct *part = - container_of(to_rcu_work(work), struct hd_struct, rcu_work); - struct gendisk *disk = part_to_disk(part); - - /* - * Release the disk reference acquired in delete_partition here. - * We can't release it in hd_struct_free because the final put_device - * needs process context and thus can't be run directly from a - * percpu_ref ->release handler. - */ - put_device(disk_to_dev(disk)); - - part->bdev->bd_start_sect = 0; - bdev_set_nr_sectors(part->bdev, 0); - part_stat_set_all(part, 0); - put_device(part_to_dev(part)); -} - -static void hd_struct_free(struct percpu_ref *ref) -{ - struct hd_struct *part = container_of(ref, struct hd_struct, ref); - struct gendisk *disk = part_to_disk(part); - struct disk_part_tbl *ptbl = - rcu_dereference_protected(disk->part_tbl, 1); - - rcu_assign_pointer(ptbl->last_lookup, NULL); - - INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); - queue_rcu_work(system_wq, &part->rcu_work); -} - -int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL)) - return -ENOMEM; - return 0; -} - /* * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. @@ -342,8 +302,8 @@ void delete_partition(struct hd_struct *part) * ->part_tbl is referenced in this part's release handler, so * we have to hold the disk device */ - get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[part->partno], NULL); + rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->bdev->bd_holder_dir); device_del(part_to_dev(part)); @@ -353,7 +313,7 @@ void delete_partition(struct hd_struct *part) */ remove_inode_hash(part->bdev->bd_inode); - percpu_ref_kill(&part->ref); + put_device(part_to_dev(part)); } static ssize_t whole_disk_show(struct device *dev, @@ -406,15 +366,11 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (ptbl->part[partno]) return ERR_PTR(-EBUSY); - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return ERR_PTR(-EBUSY); - bdev = bdev_alloc(disk, partno); if (!bdev) - goto out_free; - p->bdev = bdev; + return ERR_PTR(-ENOMEM); + p = bdev->bd_part; pdev = part_to_dev(p); bdev->bd_start_sect = start; @@ -463,13 +419,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } - err = hd_ref_init(p); - if (err) { - if (flags & ADDPART_FLAG_WHOLEDISK) - goto out_remove_file; - goto out_del; - } - /* everything is up and running, commence */ bdev_add(bdev, devt); rcu_assign_pointer(ptbl->part[partno], p); @@ -481,11 +430,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, out_bdput: bdput(bdev); -out_free: - kfree(p); return ERR_PTR(err); -out_remove_file: - device_remove_file(pdev, &dev_attr_whole_disk); out_del: kobject_put(bdev->bd_holder_dir); device_del(pdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dc333dbe5232..9e5c2fdfda36 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ba56f3f05312..343f56b86bb7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1678,7 +1678,8 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; - device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); + device->rs_last_events = + (int)part_stat_read_accum(disk->part0->bd_part, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index b5f68951c9d2..6d84876a9cd0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1687,7 +1687,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity_and_notify(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); + part_stat_set_all(zram->disk->part0->bd_part, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 48051db006f3..1b2db4d530ea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1607,7 +1607,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * (by eliminating DM's splitting and just using bio_split) */ part_stat_lock(); - __dm_part_stat_sub(&dm_disk(md)->part0, + __dm_part_stat_sub(dm_disk(md)->part0->bd_part, sectors[op_stat_group(bio_op(bio))], ci.sector_count); part_stat_unlock(); @@ -2242,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put); static bool md_in_flight_bios(struct mapped_device *md) { int cpu; - struct hd_struct *part = &dm_disk(md)->part0; + struct hd_struct *part = dm_disk(md)->part0->bd_part; long sum = 0; for_each_possible_cpu(cpu) { diff --git a/drivers/md/md.c b/drivers/md/md.c index 7ce6047c856e..3696c2d77a4d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8441,7 +8441,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_disk; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/fs/block_dev.c b/fs/block_dev.c index 381c22426f43..61cf33b6284f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -39,6 +39,7 @@ struct bdev_inode { struct block_device bdev; + struct hd_struct hd; struct inode vfs_inode; }; @@ -886,6 +887,9 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) iput(inode); return NULL; } + bdev->bd_part = &BDEV_I(inode)->hd; + memset(bdev->bd_part, 0, sizeof(*bdev->bd_part)); + bdev->bd_part->bdev = bdev; return bdev; } @@ -1280,15 +1284,10 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); static int __blkdev_get(struct block_device *bdev, fmode_t mode) { struct gendisk *disk = bdev->bd_disk; - int ret; + int ret = 0; if (!bdev->bd_openers) { if (!bdev_is_partition(bdev)) { - ret = -ENXIO; - bdev->bd_part = disk_get_part(disk, 0); - if (!bdev->bd_part) - goto out_clear; - ret = 0; if (disk->fops->open) ret = disk->fops->open(bdev, mode); @@ -1307,7 +1306,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_clear; + return ret; } else { struct block_device *whole = bdget_disk(disk, 0); @@ -1316,18 +1315,16 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) if (ret) { mutex_unlock(&whole->bd_mutex); bdput(whole); - goto out_clear; + return ret; } whole->bd_part_count++; mutex_unlock(&whole->bd_mutex); - bdev->bd_part = disk_get_part(disk, bdev->bd_partno); if (!(disk->flags & GENHD_FL_UP) || - !bdev->bd_part || !bdev_nr_sectors(bdev)) { + !bdev_nr_sectors(bdev)) { __blkdev_put(whole, mode, 1); bdput(whole); - ret = -ENXIO; - goto out_clear; + return -ENXIO; } set_init_blocksize(bdev); } @@ -1336,7 +1333,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { if (!bdev_is_partition(bdev)) { - ret = 0; if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ @@ -1349,11 +1345,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) } bdev->bd_openers++; return 0; - - out_clear: - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - return ret; } struct block_device *blkdev_get_no_open(dev_t dev) @@ -1580,18 +1571,12 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) sync_blockdev(bdev); kill_bdev(bdev); bdev_write_inode(bdev); - - if (!bdev_is_partition(bdev) && disk->fops->release) - disk->fops->release(disk, mode); - - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; if (bdev_is_partition(bdev)) victim = bdev_whole(bdev); - } else { - if (!bdev_is_partition(bdev) && disk->fops->release) - disk->fops->release(disk, mode); } + + if (!bdev_is_partition(bdev) && disk->fops->release) + disk->fops->release(disk, mode); mutex_unlock(&bdev->bd_mutex); if (victim) { __blkdev_put(victim, mode, 1); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 758cf71c9aa2..6edea5c16259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -59,7 +59,7 @@ struct block_device { } __randomize_layout; #define bdev_whole(_bdev) \ - ((_bdev)->bd_disk->part0.bdev) + ((_bdev)->bd_disk->part0) #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index dcbf9ef7610e..df7319da013c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,11 +19,12 @@ #include #include -#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) +#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) +#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) + extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,12 +52,9 @@ struct partition_meta_info { }; struct hd_struct { - struct percpu_ref ref; - struct block_device *bdev; struct device __dev; int partno; - struct rcu_work rcu_work; }; /** @@ -168,7 +166,7 @@ struct gendisk { * helpers. */ struct disk_part_tbl __rcu *part_tbl; - struct hd_struct part0; + struct block_device *part0; const struct block_device_operations *fops; struct request_queue *queue; @@ -278,7 +276,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.bdev->bd_read_only; + return disk->part0->bd_read_only; } extern void disk_block_events(struct gendisk *disk); @@ -302,7 +300,7 @@ static inline sector_t bdev_nr_sectors(struct block_device *bdev) static inline sector_t get_capacity(struct gendisk *disk) { - return bdev_nr_sectors(disk->part0.bdev); + return bdev_nr_sectors(disk->part0); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 87ad60106e1d..680de036691e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -59,8 +59,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ + __part_stat_add(part_to_disk((part))->part0->bd_part, \ + field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 8446fe9255be821cb38ffd306d7e8edc4b9ea662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:36:54 +0100 Subject: block: switch partition lookup to use struct block_device Use struct block_device to lookup partitions on a disk. This removes all usage of struct hd_struct from the I/O path. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- block/bio.c | 4 +-- block/blk-core.c | 66 +++++++++++++++++--------------------- block/blk-flush.c | 2 +- block/blk-mq.c | 9 +++--- block/blk-mq.h | 7 ++-- block/blk.h | 4 +-- block/genhd.c | 57 +++++++++++++++++--------------- block/partitions/core.c | 7 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/bcache/request.c | 4 +-- drivers/md/dm.c | 4 +-- drivers/md/md.c | 4 +-- drivers/nvme/target/admin-cmd.c | 20 ++++++------ fs/ext4/super.c | 18 ++++------- fs/ext4/sysfs.c | 10 ++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 6 ++-- include/linux/blkdev.h | 8 ++--- include/linux/genhd.h | 4 +-- include/linux/part_stat.h | 17 +++++----- 22 files changed, 122 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 669bb47a3198..ebb18136b86f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -608,12 +608,12 @@ void bio_truncate(struct bio *bio, unsigned new_size) void guard_bio_eod(struct bio *bio) { sector_t maxsector; - struct hd_struct *part; + struct block_device *part; rcu_read_lock(); part = __disk_get_part(bio->bi_disk, bio->bi_partno); if (part) - maxsector = bdev_nr_sectors(part->bdev); + maxsector = bdev_nr_sectors(part); else maxsector = get_capacity(bio->bi_disk); rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index 9ea70275fc1c..cee568389b7e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -666,10 +666,9 @@ static int __init setup_fail_make_request(char *str) } __setup("fail_make_request=", setup_fail_make_request); -static bool should_fail_request(struct hd_struct *part, unsigned int bytes) +static bool should_fail_request(struct block_device *part, unsigned int bytes) { - return part->bdev->bd_make_it_fail && - should_fail(&fail_make_request, bytes); + return part->bd_make_it_fail && should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) @@ -684,7 +683,7 @@ late_initcall(fail_make_request_debugfs); #else /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool should_fail_request(struct hd_struct *part, +static inline bool should_fail_request(struct block_device *part, unsigned int bytes) { return false; @@ -692,11 +691,11 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) +static inline bool bio_check_ro(struct bio *bio, struct block_device *part) { const int op = bio_op(bio); - if (part->bdev->bd_read_only && op_is_write(op)) { + if (part->bd_read_only && op_is_write(op)) { char b[BDEVNAME_SIZE]; if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) @@ -704,7 +703,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) WARN_ONCE(1, "Trying to write to read-only block-device %s (partno %d)\n", - bio_devname(bio, b), part->partno); + bio_devname(bio, b), part->bd_partno); /* Older lvm-tools actually trigger this */ return false; } @@ -714,8 +713,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) static noinline int should_fail_bio(struct bio *bio) { - if (should_fail_request(bio->bi_disk->part0->bd_part, - bio->bi_iter.bi_size)) + if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size)) return -EIO; return 0; } @@ -744,7 +742,7 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector) */ static inline int blk_partition_remap(struct bio *bio) { - struct hd_struct *p; + struct block_device *p; int ret = -EIO; rcu_read_lock(); @@ -757,12 +755,12 @@ static inline int blk_partition_remap(struct bio *bio) goto out; if (bio_sectors(bio)) { - if (bio_check_eod(bio, bdev_nr_sectors(p->bdev))) + if (bio_check_eod(bio, bdev_nr_sectors(p))) goto out; - bio->bi_iter.bi_sector += p->bdev->bd_start_sect; - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), + bio->bi_iter.bi_sector += p->bd_start_sect; + trace_block_bio_remap(bio->bi_disk->queue, bio, p->bd_dev, bio->bi_iter.bi_sector - - p->bdev->bd_start_sect); + p->bd_start_sect); } bio->bi_partno = 0; ret = 0; @@ -832,7 +830,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(blk_partition_remap(bio))) goto end_io; } else { - if (unlikely(bio_check_ro(bio, bio->bi_disk->part0->bd_part))) + if (unlikely(bio_check_ro(bio, bio->bi_disk->part0))) goto end_io; if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) goto end_io; @@ -1204,7 +1202,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * return ret; if (rq->rq_disk && - should_fail_request(rq->rq_disk->part0->bd_part, blk_rq_bytes(rq))) + should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; if (blk_crypto_insert_cloned_request(rq)) @@ -1263,17 +1261,18 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) +static void update_io_ticks(struct block_device *part, unsigned long now, + bool end) { unsigned long stamp; again: - stamp = READ_ONCE(part->bdev->bd_stamp); + stamp = READ_ONCE(part->bd_stamp); if (unlikely(stamp != now)) { - if (likely(cmpxchg(&part->bdev->bd_stamp, stamp, now) == stamp)) + if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } - if (part->partno) { - part = part_to_disk(part)->part0->bd_part; + if (part->bd_partno) { + part = bdev_whole(part); goto again; } } @@ -1282,11 +1281,9 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (req->part && blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - part_stat_add(part, sectors[sgrp], bytes >> 9); + part_stat_add(req->part, sectors[sgrp], bytes >> 9); part_stat_unlock(); } } @@ -1301,14 +1298,11 @@ void blk_account_io_done(struct request *req, u64 now) if (req->part && blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - - update_io_ticks(part, jiffies, true); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); + update_io_ticks(req->part, jiffies, true); + part_stat_inc(req->part, ios[sgrp]); + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); } } @@ -1325,7 +1319,7 @@ void blk_account_io_start(struct request *rq) part_stat_unlock(); } -static unsigned long __part_start_io_acct(struct hd_struct *part, +static unsigned long __part_start_io_acct(struct block_device *part, unsigned int sectors, unsigned int op) { const int sgrp = op_stat_group(op); @@ -1341,7 +1335,7 @@ static unsigned long __part_start_io_acct(struct hd_struct *part, return now; } -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, +unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part, struct bio *bio) { *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); @@ -1353,11 +1347,11 @@ EXPORT_SYMBOL_GPL(part_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(disk->part0->bd_part, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op); } EXPORT_SYMBOL(disk_start_io_acct); -static void __part_end_io_acct(struct hd_struct *part, unsigned int op, +static void __part_end_io_acct(struct block_device *part, unsigned int op, unsigned long start_time) { const int sgrp = op_stat_group(op); @@ -1371,7 +1365,7 @@ static void __part_end_io_acct(struct hd_struct *part, unsigned int op, part_stat_unlock(); } -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time) { __part_end_io_acct(part, bio_op(bio), start_time); @@ -1381,7 +1375,7 @@ EXPORT_SYMBOL_GPL(part_end_io_acct); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time) { - __part_end_io_acct(disk->part0->bd_part, op, start_time); + __part_end_io_acct(disk->part0, op, start_time); } EXPORT_SYMBOL(disk_end_io_acct); diff --git a/block/blk-flush.c b/block/blk-flush.c index fcd0a60574df..9507dcdd5881 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -139,7 +139,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = rq->rq_disk->part0->bd_part; + struct block_device *part = rq->rq_disk->part0; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); diff --git a/block/blk-mq.c b/block/blk-mq.c index 55bcee5dc032..a2593748fa53 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -95,7 +95,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, } struct mq_inflight { - struct hd_struct *part; + struct block_device *part; unsigned int inflight[2]; }; @@ -111,7 +111,8 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, return true; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part) { struct mq_inflight mi = { .part = part }; @@ -120,8 +121,8 @@ unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) return mi.inflight[0] + mi.inflight[1]; } -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]) +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]) { struct mq_inflight mi = { .part = part }; diff --git a/block/blk-mq.h b/block/blk-mq.h index a52703c98b77..c696515766c7 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -182,9 +182,10 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) return hctx->nr_ctx && hctx->tags; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part); +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]); static inline void blk_mq_put_dispatch_budget(struct request_queue *q) { diff --git a/block/blk.h b/block/blk.h index 32ac41f7557f..d5bf8f3a0781 100644 --- a/block/blk.h +++ b/block/blk.h @@ -215,7 +215,7 @@ static inline void elevator_exit(struct request_queue *q, __elevator_exit(q, e); } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); +struct block_device *__disk_get_part(struct gendisk *disk, int partno); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -348,7 +348,7 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q); static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} #endif -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); int blk_alloc_devt(struct hd_struct *part, dev_t *devt); void blk_free_devt(dev_t devt); diff --git a/block/genhd.c b/block/genhd.c index c35b03dac5e5..ed06466b305d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -126,7 +126,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) } } -static unsigned int part_in_flight(struct hd_struct *part) +static unsigned int part_in_flight(struct block_device *part) { unsigned int inflight = 0; int cpu; @@ -141,7 +141,8 @@ static unsigned int part_in_flight(struct hd_struct *part) return inflight; } -static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) +static void part_in_flight_rw(struct block_device *part, + unsigned int inflight[2]) { int cpu; @@ -157,7 +158,7 @@ static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) inflight[1] = 0; } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) +struct block_device *__disk_get_part(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); @@ -182,15 +183,21 @@ struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) */ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) { + struct block_device *bdev; struct hd_struct *part; rcu_read_lock(); - part = __disk_get_part(disk, partno); - if (part) - get_device(part_to_dev(part)); + bdev = __disk_get_part(disk, partno); + if (!bdev) + goto fail; + part = bdev->bd_part; + if (!kobject_get_unless_zero(&part_to_dev(part)->kobj)) + goto fail; rcu_read_unlock(); - return part; +fail: + rcu_read_unlock(); + return NULL; } /** @@ -264,19 +271,19 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* iterate to the next partition */ for (; piter->idx != end; piter->idx += inc) { - struct hd_struct *part; + struct block_device *part; part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!bdev_nr_sectors(part->bdev) && + if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && piter->idx == 0)) continue; - get_device(part_to_dev(part)); - piter->part = part; + get_device(part_to_dev(part->bd_part)); + piter->part = part->bd_part; piter->idx += inc; break; } @@ -303,10 +310,10 @@ void disk_part_iter_exit(struct disk_part_iter *piter) } EXPORT_SYMBOL_GPL(disk_part_iter_exit); -static inline int sector_in_part(struct hd_struct *part, sector_t sector) +static inline int sector_in_part(struct block_device *part, sector_t sector) { - return part->bdev->bd_start_sect <= sector && - sector < part->bdev->bd_start_sect + bdev_nr_sectors(part->bdev); + return part->bd_start_sect <= sector && + sector < part->bd_start_sect + bdev_nr_sectors(part); } /** @@ -324,10 +331,10 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector) * Found partition on success, part0 is returned if no partition matches * or the matched partition is being deleted. */ -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { struct disk_part_tbl *ptbl; - struct hd_struct *part; + struct block_device *part; int i; rcu_read_lock(); @@ -346,7 +353,7 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) } } - part = disk->part0->bd_part; + part = disk->part0; out_unlock: rcu_read_unlock(); return part; @@ -882,7 +889,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); - part_stat_set_all(disk->part0->bd_part, 0); + part_stat_set_all(disk->part0, 0); disk->part0->bd_stamp = 0; if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); @@ -1189,9 +1196,9 @@ ssize_t part_stat_show(struct device *dev, part_stat_read_all(p, &stat); if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, p); + inflight = blk_mq_in_flight(q, p->bdev); else - inflight = part_in_flight(p); + inflight = part_in_flight(p->bdev); return sprintf(buf, "%8lu %8lu %8llu %8u " @@ -1231,9 +1238,9 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, unsigned int inflight[2]; if (queue_is_mq(q)) - blk_mq_in_flight_rw(q, p, inflight); + blk_mq_in_flight_rw(q, p->bdev, inflight); else - part_in_flight_rw(p, inflight); + part_in_flight_rw(p->bdev, inflight); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } @@ -1506,9 +1513,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) while ((hd = disk_part_iter_next(&piter))) { part_stat_read_all(hd, &stat); if (queue_is_mq(gp->queue)) - inflight = blk_mq_in_flight(gp->queue, hd); + inflight = blk_mq_in_flight(gp->queue, hd->bdev); else - inflight = part_in_flight(hd); + inflight = part_in_flight(hd->bdev); seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " @@ -1626,7 +1633,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) goto out_bdput; ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[0], disk->part0->bd_part); + rcu_assign_pointer(ptbl->part[0], disk->part0); disk->minors = minors; rand_initialize_disk(disk); diff --git a/block/partitions/core.c b/block/partitions/core.c index 6d1fca193cbd..c2f6721633b8 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -298,12 +298,9 @@ void delete_partition(struct hd_struct *part) struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - /* - * ->part_tbl is referenced in this part's release handler, so - * we have to hold the disk device - */ rcu_assign_pointer(ptbl->part[part->partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); + kobject_put(part->bdev->bd_holder_dir); device_del(part_to_dev(part)); @@ -421,7 +418,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, /* everything is up and running, commence */ bdev_add(bdev, devt); - rcu_assign_pointer(ptbl->part[partno], p); + rcu_assign_pointer(ptbl->part[partno], bdev); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9e5c2fdfda36..09c86ef3f0fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 343f56b86bb7..02044ab7f767 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1679,7 +1679,7 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; device->rs_last_events = - (int)part_stat_read_accum(disk->part0->bd_part, sectors); + (int)part_stat_read_accum(disk->part0, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6d84876a9cd0..dc8957d173d3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1687,7 +1687,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity_and_notify(zram->disk, 0); - part_stat_set_all(zram->disk->part0->bd_part, 0); + part_stat_set_all(zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index afac8d07c1bd..85b1f2a9b72d 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -475,7 +475,7 @@ struct search { unsigned int read_dirty_data:1; unsigned int cache_missed:1; - struct hd_struct *part; + struct block_device *part; unsigned long start_time; struct btree_op op; @@ -1073,7 +1073,7 @@ struct detached_dev_io_private { unsigned long start_time; bio_end_io_t *bi_end_io; void *bi_private; - struct hd_struct *part; + struct block_device *part; }; static void detached_dev_end_io(struct bio *bio) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1b2db4d530ea..176adcff56b3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1607,7 +1607,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * (by eliminating DM's splitting and just using bio_split) */ part_stat_lock(); - __dm_part_stat_sub(dm_disk(md)->part0->bd_part, + __dm_part_stat_sub(dm_disk(md)->part0, sectors[op_stat_group(bio_op(bio))], ci.sector_count); part_stat_unlock(); @@ -2242,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put); static bool md_in_flight_bios(struct mapped_device *md) { int cpu; - struct hd_struct *part = dm_disk(md)->part0->bd_part; + struct block_device *part = dm_disk(md)->part0; long sum = 0; for_each_possible_cpu(cpu) { diff --git a/drivers/md/md.c b/drivers/md/md.c index 3696c2d77a4d..0065736f05b4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -464,7 +464,7 @@ struct md_io { bio_end_io_t *orig_bi_end_io; void *orig_bi_private; unsigned long start_time; - struct hd_struct *part; + struct block_device *part; }; static void md_end_io(struct bio *bio) @@ -8441,7 +8441,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_disk; - curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dca34489a1dc..8d90235e4fcc 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -89,12 +89,12 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, if (!ns->bdev) goto out; - host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); - data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[READ]), 1000); - host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); - data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[WRITE]), 1000); + host_reads = part_stat_read(ns->bdev, ios[READ]); + data_units_read = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes = part_stat_read(ns->bdev, ios[WRITE]); + data_units_written = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[WRITE]), 1000); put_unaligned_le64(host_reads, &slog->host_reads[0]); put_unaligned_le64(data_units_read, &slog->data_units_read[0]); @@ -120,12 +120,12 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, /* we don't have the right data for file backed ns */ if (!ns->bdev) continue; - host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); + host_reads += part_stat_read(ns->bdev, ios[READ]); data_units_read += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000); - host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); + part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes += part_stat_read(ns->bdev, ios[WRITE]); data_units_written += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000); + part_stat_read(ns->bdev, sectors[WRITE]), 1000); } put_unaligned_le64(host_reads, &slog->host_reads[0]); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6633b20224d5..c303a0ff0b17 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4048,9 +4048,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; - if (sb->s_bdev->bd_part) - sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]); + sbi->s_sectors_written_start = + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Cleanup superblock name */ strreplace(sb->s_id, '/', '!'); @@ -5509,15 +5508,10 @@ static int ext4_commit_super(struct super_block *sb, int sync) */ if (!(sb->s_flags & SB_RDONLY)) ext4_update_tstamp(es, s_wtime); - if (sb->s_bdev->bd_part) - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); - else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); + es->s_kbytes_written = + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1)); if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) ext4_free_blocks_count_set(es, EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 4e27fe6ed3ae..075aa3a19ff5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -62,11 +62,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -74,12 +71,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb700d797296..49681a8d2b14 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1675,7 +1675,7 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ + (((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) - \ (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d4e7fab352ba..af9f449da64b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3700,10 +3700,8 @@ try_onemore: } /* For write statistics */ - if (sb->s_bdev->bd_part) - sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]); + sbi->sectors_written_start = + (u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 619adea57098..1d4be1fc6007 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -191,7 +191,7 @@ struct request { }; struct gendisk *rq_disk; - struct hd_struct *part; + struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; @@ -1943,9 +1943,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, - struct bio *bio); -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +unsigned long part_start_io_acct(struct gendisk *disk, + struct block_device **part, struct bio *bio); +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time); /** diff --git a/include/linux/genhd.h b/include/linux/genhd.h index df7319da013c..fe6fee77e2b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -131,8 +131,8 @@ enum { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct __rcu *last_lookup; - struct hd_struct __rcu *part[]; + struct block_device __rcu *last_lookup; + struct block_device __rcu *part[]; }; struct disk_events; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 680de036691e..d2558121d48c 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,26 +25,26 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) + (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->bdev->bd_stats->field) res = 0; \ + typeof((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->bdev->bd_stats, i), value, + memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,13 +54,12 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->bdev->bd_stats->field, addnd) + __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(part_to_disk((part))->part0->bd_part, \ - field, addnd); \ + if ((part)->bd_partno) \ + __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 41e5c81984eac8ce87f2b4f57fec0bd90a049b2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:37:14 +0100 Subject: block: remove the partno field from struct hd_struct Just use the bd_partno field in struct block_device everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 12 ++++++------ block/partitions/core.c | 9 ++++----- include/linux/genhd.h | 1 - init/do_mounts.c | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index ed06466b305d..b7e39b41a275 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -589,8 +589,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) int idx; /* in consecutive minor range? */ - if (part->partno < disk->minors) { - *devt = MKDEV(disk->major, disk->first_minor + part->partno); + if (part->bdev->bd_partno < disk->minors) { + *devt = MKDEV(disk->major, disk->first_minor + part->bdev->bd_partno); return 0; } @@ -864,7 +864,7 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - invalidate_partition(disk, part->partno); + invalidate_partition(disk, part->bdev->bd_partno); delete_partition(part); } disk_part_iter_exit(&piter); @@ -1008,7 +1008,7 @@ void __init printk_all_partitions(void) printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), bdev_nr_sectors(part->bdev) >> 1, - disk_name(disk, part->partno, name_buf), + disk_name(disk, part->bdev->bd_partno, name_buf), part->bdev->bd_meta_info ? part->bdev->bd_meta_info->uuid : ""); if (is_part0) { @@ -1102,7 +1102,7 @@ static int show_partition(struct seq_file *seqf, void *v) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), bdev_nr_sectors(part->bdev) >> 1, - disk_name(sgp, part->partno, buf)); + disk_name(sgp, part->bdev->bd_partno, buf)); disk_part_iter_exit(&piter); return 0; @@ -1525,7 +1525,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) "%lu %u" "\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), - disk_name(gp, hd->partno, buf), + disk_name(gp, hd->bdev->bd_partno, buf), stat.ios[STAT_READ], stat.merges[STAT_READ], stat.sectors[STAT_READ], diff --git a/block/partitions/core.c b/block/partitions/core.c index c2f6721633b8..6db9ca8b722d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -184,7 +184,7 @@ static ssize_t part_partition_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->partno); + return sprintf(buf, "%d\n", p->bdev->bd_partno); } static ssize_t part_start_show(struct device *dev, @@ -274,7 +274,7 @@ static int part_uevent(struct device *dev, struct kobj_uevent_env *env) { struct hd_struct *part = dev_to_part(dev); - add_uevent_var(env, "PARTN=%u", part->partno); + add_uevent_var(env, "PARTN=%u", part->bdev->bd_partno); if (part->bdev->bd_meta_info && part->bdev->bd_meta_info->volname[0]) add_uevent_var(env, "PARTNAME=%s", part->bdev->bd_meta_info->volname); @@ -298,7 +298,7 @@ void delete_partition(struct hd_struct *part) struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[part->partno], NULL); + rcu_assign_pointer(ptbl->part[part->bdev->bd_partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->bdev->bd_holder_dir); @@ -372,7 +372,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, bdev->bd_start_sect = start; bdev_set_nr_sectors(bdev, len); - p->partno = partno; bdev->bd_read_only = get_disk_ro(disk); if (info) { @@ -445,7 +444,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { - if (part->partno == skip_partno || + if (part->bdev->bd_partno == skip_partno || start >= part->bdev->bd_start_sect + bdev_nr_sectors(part->bdev) || start + length <= part->bdev->bd_start_sect) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fe6fee77e2b9..3c13d4708e3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -54,7 +54,6 @@ struct partition_meta_info { struct hd_struct { struct block_device *bdev; struct device __dev; - int partno; }; /** diff --git a/init/do_mounts.c b/init/do_mounts.c index 368ccb718501..86bef93e72eb 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -136,7 +136,7 @@ static dev_t devt_from_partuuid(const char *uuid_str) struct hd_struct *part; part = disk_get_part(dev_to_disk(dev), - dev_to_part(dev)->partno + offset); + dev_to_part(dev)->bdev->bd_partno + offset); if (part) { devt = part_devt(part); put_device(part_to_dev(part)); -- cgit v1.2.3 From ad1eaa5344b293552b6ba43f5709c76a9aa14d17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:52:59 +0100 Subject: block: switch disk_part_iter_* to use a struct block_device Switch the partition iter infrastructure to iterate over block_device references instead of hd_struct ones mostly used to get at the block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 59 ++++++++++++++++++++++++----------------------- block/partitions/core.c | 13 +++++------ drivers/s390/block/dasd.c | 8 +++---- include/linux/genhd.h | 4 ++-- 4 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 452f7c646e02..2d34dd2da4e9 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init); * CONTEXT: * Don't care. */ -struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +struct block_device *disk_part_iter_next(struct disk_part_iter *piter) { struct disk_part_tbl *ptbl; int inc, end; @@ -282,8 +282,9 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) piter->idx == 0)) continue; - get_device(part_to_dev(part->bd_part)); - piter->part = part->bd_part; + piter->part = bdgrab(part); + if (!piter->part) + continue; piter->idx += inc; break; } @@ -305,7 +306,8 @@ EXPORT_SYMBOL_GPL(disk_part_iter_next); */ void disk_part_iter_exit(struct disk_part_iter *piter) { - disk_put_part(piter->part); + if (piter->part) + bdput(piter->part); piter->part = NULL; } EXPORT_SYMBOL_GPL(disk_part_iter_exit); @@ -346,7 +348,6 @@ struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) for (i = 1; i < ptbl->len; i++) { part = rcu_dereference(ptbl->part[i]); - if (part && sector_in_part(part, sector)) { rcu_assign_pointer(ptbl->last_lookup, part); goto out_unlock; @@ -647,7 +648,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, { struct device *ddev = disk_to_dev(disk); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; int err; ddev->parent = parent; @@ -697,7 +698,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + kobject_uevent(bdev_kobj(part), KOBJ_ADD); disk_part_iter_exit(&piter); if (disk->queue->backing_dev_info->dev) { @@ -837,7 +838,7 @@ static void invalidate_partition(struct block_device *bdev) void del_gendisk(struct gendisk *disk) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; might_sleep(); @@ -857,8 +858,8 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - invalidate_partition(part->bdev); - delete_partition(part); + invalidate_partition(part); + delete_partition(part->bd_part); } disk_part_iter_exit(&piter); @@ -977,7 +978,7 @@ void __init printk_all_partitions(void) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; @@ -996,14 +997,14 @@ void __init printk_all_partitions(void) */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) { - bool is_part0 = part == disk->part0->bd_part; + bool is_part0 = part == disk->part0; printk("%s%s %10llu %s %s", is_part0 ? "" : " ", - bdevt_str(part_devt(part), devt_buf), - bdev_nr_sectors(part->bdev) >> 1, - disk_name(disk, part->bdev->bd_partno, name_buf), - part->bdev->bd_meta_info ? - part->bdev->bd_meta_info->uuid : ""); + bdevt_str(part->bd_dev, devt_buf), + bdev_nr_sectors(part) >> 1, + disk_name(disk, part->bd_partno, name_buf), + part->bd_meta_info ? + part->bd_meta_info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) printk(" driver: %s\n", @@ -1079,7 +1080,7 @@ static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ @@ -1093,9 +1094,9 @@ static int show_partition(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(part_devt(part)), MINOR(part_devt(part)), - bdev_nr_sectors(part->bdev) >> 1, - disk_name(sgp, part->bdev->bd_partno, buf)); + MAJOR(part->bd_dev), MINOR(part->bd_dev), + bdev_nr_sectors(part) >> 1, + disk_name(sgp, part->bd_partno, buf)); disk_part_iter_exit(&piter); return 0; @@ -1489,7 +1490,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; struct disk_part_iter piter; - struct hd_struct *hd; + struct block_device *hd; char buf[BDEVNAME_SIZE]; unsigned int inflight; struct disk_stats stat; @@ -1504,11 +1505,11 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { - part_stat_read_all(hd, &stat); + part_stat_read_all(hd->bd_part, &stat); if (queue_is_mq(gp->queue)) - inflight = blk_mq_in_flight(gp->queue, hd->bdev); + inflight = blk_mq_in_flight(gp->queue, hd); else - inflight = part_in_flight(hd->bdev); + inflight = part_in_flight(hd); seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " @@ -1517,8 +1518,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) "%lu %lu %lu %u " "%lu %u" "\n", - MAJOR(part_devt(hd)), MINOR(part_devt(hd)), - disk_name(gp, hd->bdev->bd_partno, buf), + MAJOR(hd->bd_dev), MINOR(hd->bd_dev), + disk_name(gp, hd->bd_partno, buf), stat.ios[STAT_READ], stat.merges[STAT_READ], stat.sectors[STAT_READ], @@ -1673,7 +1674,7 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (disk->part0->bd_read_only != flag) { set_disk_ro_uevent(disk, flag); @@ -1682,7 +1683,7 @@ void set_disk_ro(struct gendisk *disk, int flag) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - part->bdev->bd_read_only = flag; + part->bd_read_only = flag; disk_part_iter_exit(&piter); } diff --git a/block/partitions/core.c b/block/partitions/core.c index 3d8243334c7c..4cb6df175f90 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -439,15 +439,14 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, sector_t length, int skip_partno) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; bool overlap = false; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { - if (part->bdev->bd_partno == skip_partno || - start >= part->bdev->bd_start_sect + - bdev_nr_sectors(part->bdev) || - start + length <= part->bdev->bd_start_sect) + if (part->bd_partno == skip_partno || + start >= part->bd_start_sect + bdev_nr_sectors(part) || + start + length <= part->bd_start_sect) continue; overlap = true; break; @@ -568,7 +567,7 @@ static bool disk_unlock_native_capacity(struct gendisk *disk) int blk_drop_partitions(struct block_device *bdev) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (bdev->bd_part_count) return -EBUSY; @@ -578,7 +577,7 @@ int blk_drop_partitions(struct block_device *bdev) disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - delete_partition(part); + delete_partition(part->bd_part); disk_part_iter_exit(&piter); return 0; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index db24e04ee978..1825fa8d05a7 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -432,7 +432,7 @@ dasd_state_ready_to_online(struct dasd_device * device) { struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; device->state = DASD_STATE_ONLINE; if (device->block) { @@ -445,7 +445,7 @@ dasd_state_ready_to_online(struct dasd_device * device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; @@ -459,7 +459,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) int rc; struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (device->discipline->online_to_ready) { rc = device->discipline->online_to_ready(device); @@ -472,7 +472,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c13d4708e3f..cd23c80265b2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -244,14 +244,14 @@ static inline void disk_put_part(struct hd_struct *part) struct disk_part_iter { struct gendisk *disk; - struct hd_struct *part; + struct block_device *part; int idx; unsigned int flags; }; extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); -extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +struct block_device *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); extern bool disk_has_partitions(struct gendisk *disk); -- cgit v1.2.3 From 0d02129e76edf91cf04fabf1efbc3a9a1f1d729a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:43:51 +0100 Subject: block: merge struct block_device and struct hd_struct Instead of having two structures that represent each block device with different life time rules, merge them into a single one. This also greatly simplifies the reference counting rules, as we can use the inode reference count as the main reference count for the new struct block_device, with the device model reference front ending it for device model interaction. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 8 ++-- block/blk.h | 2 +- block/genhd.c | 90 +++++++++++------------------------ block/partitions/core.c | 116 ++++++++++++++++++---------------------------- fs/block_dev.c | 9 ---- include/linux/blk_types.h | 8 +++- include/linux/blkdev.h | 1 - include/linux/genhd.h | 40 ++++------------ init/do_mounts.c | 21 ++++----- kernel/trace/blktrace.c | 43 ++++------------- 10 files changed, 108 insertions(+), 230 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 79aa96240cec..031114d454a6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -820,9 +820,9 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { - struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part = disk_get_part(disk, 0); - struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); + struct block_device *bdev = dev_to_bdev(dev); + struct blkcg_gq *blkg = + blk_queue_root_blkg(bdev->bd_disk->queue); struct blkg_iostat tmp; int cpu; @@ -830,7 +830,7 @@ static void blkcg_fill_root_iostats(void) for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - cpu_dkstats = per_cpu_ptr(part->bdev->bd_stats, cpu); + cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += cpu_dkstats->ios[STAT_READ]; tmp.ios[BLKG_IOSTAT_WRITE] += diff --git a/block/blk.h b/block/blk.h index 9657c6da7c77..98f0b1ae2641 100644 --- a/block/blk.h +++ b/block/blk.h @@ -356,7 +356,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -void delete_partition(struct hd_struct *part); +void delete_partition(struct block_device *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); diff --git a/block/genhd.c b/block/genhd.c index 2d34dd2da4e9..0fabfc90b8e4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -106,13 +106,14 @@ const char *bdevname(struct block_device *bdev, char *buf) } EXPORT_SYMBOL(bdevname); -static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) +static void part_stat_read_all(struct block_device *part, + struct disk_stats *stat) { int cpu; memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { - struct disk_stats *ptr = per_cpu_ptr(part->bdev->bd_stats, cpu); + struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { @@ -167,39 +168,6 @@ struct block_device *__disk_get_part(struct gendisk *disk, int partno) return rcu_dereference(ptbl->part[partno]); } -/** - * disk_get_part - get partition - * @disk: disk to look partition from - * @partno: partition number - * - * Look for partition @partno from @disk. If found, increment - * reference count and return it. - * - * CONTEXT: - * Don't care. - * - * RETURNS: - * Pointer to the found partition on success, NULL if not found. - */ -struct hd_struct *disk_get_part(struct gendisk *disk, int partno) -{ - struct block_device *bdev; - struct hd_struct *part; - - rcu_read_lock(); - bdev = __disk_get_part(disk, partno); - if (!bdev) - goto fail; - part = bdev->bd_part; - if (!kobject_get_unless_zero(&part_to_dev(part)->kobj)) - goto fail; - rcu_read_unlock(); - return part; -fail: - rcu_read_unlock(); - return NULL; -} - /** * disk_part_iter_init - initialize partition iterator * @piter: iterator to initialize @@ -859,7 +827,7 @@ void del_gendisk(struct gendisk *disk) DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { invalidate_partition(part); - delete_partition(part->bd_part); + delete_partition(part); } disk_part_iter_exit(&piter); @@ -952,13 +920,13 @@ void blk_request_module(dev_t devt) */ struct block_device *bdget_disk(struct gendisk *disk, int partno) { - struct hd_struct *part; struct block_device *bdev = NULL; - part = disk_get_part(disk, partno); - if (part) - bdev = bdget_part(part); - disk_put_part(part); + rcu_read_lock(); + bdev = __disk_get_part(disk, partno); + if (bdev && !bdgrab(bdev)) + bdev = NULL; + rcu_read_unlock(); return bdev; } @@ -1175,24 +1143,22 @@ static ssize_t disk_ro_show(struct device *dev, ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n", bdev_nr_sectors(p->bdev)); + return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; struct disk_stats stat; unsigned int inflight; - part_stat_read_all(p, &stat); + part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, p->bdev); + inflight = blk_mq_in_flight(q, bdev); else - inflight = part_in_flight(p->bdev); + inflight = part_in_flight(bdev); return sprintf(buf, "%8lu %8lu %8llu %8u " @@ -1227,14 +1193,14 @@ ssize_t part_stat_show(struct device *dev, ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; unsigned int inflight[2]; if (queue_is_mq(q)) - blk_mq_in_flight_rw(q, p->bdev, inflight); + blk_mq_in_flight_rw(q, bdev, inflight); else - part_in_flight_rw(p->bdev, inflight); + part_in_flight_rw(bdev, inflight); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } @@ -1282,20 +1248,17 @@ static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->bdev->bd_make_it_fail); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); } ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct hd_struct *p = dev_to_part(dev); int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) - p->bdev->bd_make_it_fail = i; + dev_to_bdev(dev)->bd_make_it_fail = i; return count; } @@ -1505,7 +1468,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { - part_stat_read_all(hd->bd_part, &stat); + part_stat_read_all(hd, &stat); if (queue_is_mq(gp->queue)) inflight = blk_mq_in_flight(gp->queue, hd); else @@ -1577,7 +1540,7 @@ dev_t blk_lookup_devt(const char *name, int partno) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part; + struct block_device *part; if (strcmp(dev_name(dev), name)) continue; @@ -1590,13 +1553,12 @@ dev_t blk_lookup_devt(const char *name, int partno) MINOR(dev->devt) + partno); break; } - part = disk_get_part(disk, partno); + part = bdget_disk(disk, partno); if (part) { - devt = part_devt(part); - disk_put_part(part); + devt = part->bd_dev; + bdput(part); break; } - disk_put_part(part); } class_dev_iter_exit(&iter); return devt; diff --git a/block/partitions/core.c b/block/partitions/core.c index 4cb6df175f90..deca253583bd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -182,44 +182,39 @@ static struct parsed_partitions *check_partition(struct gendisk *hd, static ssize_t part_partition_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->bdev->bd_partno); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno); } static ssize_t part_start_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n", p->bdev->bd_start_sect); + return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect); } static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->bdev->bd_read_only); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only); } static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, - p->bdev->bd_start_sect)); + queue_limit_alignment_offset(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, - p->bdev->bd_start_sect)); + queue_limit_discard_alignment(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -264,20 +259,17 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { - struct hd_struct *p = dev_to_part(dev); - blk_free_devt(dev->devt); - bdput(p->bdev); + bdput(dev_to_bdev(dev)); } static int part_uevent(struct device *dev, struct kobj_uevent_env *env) { - struct hd_struct *part = dev_to_part(dev); + struct block_device *part = dev_to_bdev(dev); - add_uevent_var(env, "PARTN=%u", part->bdev->bd_partno); - if (part->bdev->bd_meta_info && part->bdev->bd_meta_info->volname[0]) - add_uevent_var(env, "PARTNAME=%s", - part->bdev->bd_meta_info->volname); + add_uevent_var(env, "PARTN=%u", part->bd_partno); + if (part->bd_meta_info && part->bd_meta_info->volname[0]) + add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname); return 0; } @@ -292,25 +284,25 @@ struct device_type part_type = { * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct hd_struct *part) +void delete_partition(struct block_device *part) { - struct gendisk *disk = part_to_disk(part); + struct gendisk *disk = part->bd_disk; struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[part->bdev->bd_partno], NULL); + rcu_assign_pointer(ptbl->part[part->bd_partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); - kobject_put(part->bdev->bd_holder_dir); - device_del(part_to_dev(part)); + kobject_put(part->bd_holder_dir); + device_del(&part->bd_device); /* * Remove the block device from the inode hash, so that it cannot be * looked up any more even when openers still hold references. */ - remove_inode_hash(part->bdev->bd_inode); + remove_inode_hash(part->bd_inode); - put_device(part_to_dev(part)); + put_device(&part->bd_device); } static ssize_t whole_disk_show(struct device *dev, @@ -324,11 +316,10 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -static struct hd_struct *add_partition(struct gendisk *disk, int partno, +static struct block_device *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) { - struct hd_struct *p; dev_t devt = MKDEV(0, 0); struct device *ddev = disk_to_dev(disk); struct device *pdev; @@ -367,9 +358,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!bdev) return ERR_PTR(-ENOMEM); - p = bdev->bd_part; - pdev = part_to_dev(p); - bdev->bd_start_sect = start; bdev_set_nr_sectors(bdev, len); bdev->bd_read_only = get_disk_ro(disk); @@ -381,6 +369,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_bdput; } + pdev = &bdev->bd_device; dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) dev_set_name(pdev, "%sp%d", dname, partno); @@ -422,7 +411,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return p; + return bdev; out_bdput: bdput(bdev); @@ -459,7 +448,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct hd_struct *part; + struct block_device *part; mutex_lock(&bdev->bd_mutex); if (partition_overlaps(bdev->bd_disk, start, length, -1)) { @@ -475,76 +464,59 @@ int bdev_add_partition(struct block_device *bdev, int partno, int bdev_del_partition(struct block_device *bdev, int partno) { - struct block_device *bdevp; - struct hd_struct *part = NULL; + struct block_device *part; int ret; - bdevp = bdget_disk(bdev->bd_disk, partno); - if (!bdevp) + part = bdget_disk(bdev->bd_disk, partno); + if (!part) return -ENXIO; - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -ENXIO; - part = disk_get_part(bdev->bd_disk, partno); - if (!part) - goto out_unlock; - ret = -EBUSY; - if (bdevp->bd_openers) + if (part->bd_openers) goto out_unlock; - sync_blockdev(bdevp); - invalidate_bdev(bdevp); + sync_blockdev(part); + invalidate_bdev(part); delete_partition(part); ret = 0; out_unlock: mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - if (part) - disk_put_part(part); + mutex_unlock(&part->bd_mutex); + bdput(part); return ret; } int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct block_device *bdevp; - struct hd_struct *part; + struct block_device *part; int ret = 0; - part = disk_get_part(bdev->bd_disk, partno); + part = bdget_disk(bdev->bd_disk, partno); if (!part) return -ENXIO; - ret = -ENOMEM; - bdevp = bdget_part(part); - if (!bdevp) - goto out_put_part; - - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -EINVAL; - if (start != part->bdev->bd_start_sect) + if (start != part->bd_start_sect) goto out_unlock; ret = -EBUSY; if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - bdev_set_nr_sectors(bdevp, length); + bdev_set_nr_sectors(part, length); ret = 0; out_unlock: - mutex_unlock(&bdevp->bd_mutex); + mutex_unlock(&part->bd_mutex); mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); -out_put_part: - disk_put_part(part); + bdput(part); return ret; } @@ -577,7 +549,7 @@ int blk_drop_partitions(struct block_device *bdev) disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - delete_partition(part->bd_part); + delete_partition(part); disk_part_iter_exit(&piter); return 0; @@ -592,7 +564,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; - struct hd_struct *part; + struct block_device *part; if (!size) return true; @@ -632,7 +604,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && (state->parts[p].flags & ADDPART_FLAG_RAID)) - md_autodetect_dev(part_to_dev(part)->devt); + md_autodetect_dev(part->bd_dev); return true; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 61cf33b6284f..a9905d8fd02b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -39,7 +39,6 @@ struct bdev_inode { struct block_device bdev; - struct hd_struct hd; struct inode vfs_inode; }; @@ -887,9 +886,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) iput(inode); return NULL; } - bdev->bd_part = &BDEV_I(inode)->hd; - memset(bdev->bd_part, 0, sizeof(*bdev->bd_part)); - bdev->bd_part->bdev = bdev; return bdev; } @@ -926,11 +922,6 @@ struct block_device *bdgrab(struct block_device *bdev) } EXPORT_SYMBOL(bdgrab); -struct block_device *bdget_part(struct hd_struct *part) -{ - return bdget(part_devt(part)); -} - long nr_blockdev_pages(void) { struct inode *inode; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6edea5c16259..866f74261b3b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include +#include #include struct bio_set; @@ -30,6 +31,7 @@ struct block_device { struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; + struct device bd_device; void * bd_holder; int bd_holders; bool bd_write_holder; @@ -38,7 +40,6 @@ struct block_device { #endif struct kobject *bd_holder_dir; u8 bd_partno; - struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; @@ -61,8 +62,11 @@ struct block_device { #define bdev_whole(_bdev) \ ((_bdev)->bd_disk->part0) +#define dev_to_bdev(device) \ + container_of((device), struct block_device, bd_device) + #define bdev_kobj(_bdev) \ - (&part_to_dev((_bdev)->bd_part)->kobj) + (&((_bdev)->bd_device.kobj)) /* * Block error status values. See block/blk-core:blk_errors for the details. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d4be1fc6007..17cedf0dc83d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1999,7 +1999,6 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd23c80265b2..809aaa32d53c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,12 +19,6 @@ #include #include -#define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define part_to_dev(part) (&((part)->__dev)) - -#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) -#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) - extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,11 +45,6 @@ struct partition_meta_info { u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; -struct hd_struct { - struct block_device *bdev; - struct device __dev; -}; - /** * DOC: genhd capability flags * @@ -190,19 +179,21 @@ struct gendisk { struct lockdep_map lockdep_map; }; +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + #if IS_REACHABLE(CONFIG_CDROM) #define disk_to_cdi(disk) ((disk)->cdi) #else #define disk_to_cdi(disk) NULL #endif -static inline struct gendisk *part_to_disk(struct hd_struct *part) -{ - if (unlikely(!part)) - return NULL; - return part->bdev->bd_disk; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -221,19 +212,6 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } -static inline dev_t part_devt(struct hd_struct *part) -{ - return part_to_dev(part)->devt; -} - -extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); - -static inline void disk_put_part(struct hd_struct *part) -{ - if (likely(part)) - put_device(part_to_dev(part)); -} - /* * Smarter partition iterator without context limits. */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 86bef93e72eb..a78e44ee6adb 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -76,11 +76,11 @@ struct uuidcmp { */ static int match_dev_by_uuid(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const struct uuidcmp *cmp = data; - struct hd_struct *part = dev_to_part(dev); - if (!part->bdev->bd_meta_info || - strncasecmp(cmp->uuid, part->bdev->bd_meta_info->uuid, cmp->len)) + if (!bdev->bd_meta_info || + strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len)) return 0; return 1; } @@ -133,13 +133,13 @@ static dev_t devt_from_partuuid(const char *uuid_str) * Attempt to find the requested partition by adding an offset * to the partition number found by UUID. */ - struct hd_struct *part; + struct block_device *part; - part = disk_get_part(dev_to_disk(dev), - dev_to_part(dev)->bdev->bd_partno + offset); + part = bdget_disk(dev_to_disk(dev), + dev_to_bdev(dev)->bd_partno + offset); if (part) { - devt = part_devt(part); - put_device(part_to_dev(part)); + devt = part->bd_dev; + bdput(part); } } else { devt = dev->devt; @@ -166,11 +166,10 @@ clear_root_wait: */ static int match_dev_by_label(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const char *label = data; - struct hd_struct *part = dev_to_part(dev); - if (!part->bdev->bd_meta_info || - strcmp(label, part->bdev->bd_meta_info->volname)) + if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname)) return 0; return 1; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8a723a91ec5a..a482a37848bf 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1810,30 +1810,15 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) return p - buf; } -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) -{ - if (bdev->bd_disk == NULL) - return NULL; - - return bdev_get_queue(bdev); -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct block_device *bdev = bdget_part(dev_to_part(dev)); - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; ssize_t ret = -ENXIO; - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; - mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, @@ -1856,9 +1841,6 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); -out: return ret; } @@ -1866,8 +1848,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct block_device *bdev; - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1883,17 +1865,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; value = ret; } - } else if (kstrtoull(buf, 0, &value)) - goto out; - - ret = -ENXIO; - bdev = bdget_part(dev_to_part(dev)); - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; + } else { + if (kstrtoull(buf, 0, &value)) + goto out; + } mutex_lock(&q->debugfs_mutex); @@ -1931,8 +1906,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); out: return ret ? ret : count; } -- cgit v1.2.3 From 6b6667aa4d1e0866f00b62d35a9be3875c7551f8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:12 +0000 Subject: block: optimise for_each_bvec() advance Because of how for_each_bvec() works it never advances across multiple entries at a time, so bvec_iter_advance() is an overkill. Add specialised bvec_iter_advance_single() that is faster. It also handles zero-len bvecs, so can kill bvec_iter_skip_zero_bvec(). text data bss dec hex filename before: 23977 805 0 24782 60ce lib/iov_iter.o before, bvec_iter_advance() w/o WARN_ONCE() 22886 600 0 23486 5bbe ./lib/iov_iter.o after: 21862 600 0 22462 57be lib/iov_iter.o Signed-off-by: Pavel Begunkov Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf792..ff832e698efb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -121,18 +121,28 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +/* + * A simpler version of bvec_iter_advance(), @bytes should not span + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len + */ +static inline void bvec_iter_advance_single(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned int bytes) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + unsigned int done = iter->bi_bvec_done + bytes; + + if (done == bv[iter->bi_idx].bv_len) { + done = 0; + iter->bi_idx++; + } + iter->bi_bvec_done = done; + iter->bi_size -= bytes; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -- cgit v1.2.3 From 22b56c2964386ddced252be407150b22f85e209e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:13 +0000 Subject: bio: optimise bvec iteration __bio_for_each_bvec(), __bio_for_each_segment() and bio_copy_data_iter() fall under conditions of bvec_iter_advance_single(), which is a faster and slimmer version of bvec_iter_advance(). Add bio_advance_iter_single() and convert them. Signed-off-by: Pavel Begunkov Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 4 ++-- include/linux/bio.h | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index ebb18136b86f..1f2cc1fbe283 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1212,8 +1212,8 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, flush_dcache_page(dst_bv.bv_page); - bio_advance_iter(src, src_iter, bytes); - bio_advance_iter(dst, dst_iter, bytes); + bio_advance_iter_single(src, src_iter, bytes); + bio_advance_iter_single(dst, dst_iter, bytes); } } EXPORT_SYMBOL(bio_copy_data_iter); diff --git a/include/linux/bio.h b/include/linux/bio.h index ecf67108f091..1edda614f7ce 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,11 +148,24 @@ static inline void bio_advance_iter(const struct bio *bio, /* TODO: It is reasonable to complete bio with error here. */ } +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) @@ -161,7 +174,7 @@ static inline void bio_advance_iter(const struct bio *bio, for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ -- cgit v1.2.3 From a54895fa057c67700270777f7661d8d3c7fda88a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:39 +0100 Subject: block: remove the request_queue to argument request based tracepoints The request_queue can trivially be derived from the request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- block/blk-mq-sched.c | 2 +- block/blk-mq.c | 8 ++++---- drivers/md/dm-rq.c | 2 +- drivers/s390/scsi/zfcp_fsf.c | 3 +-- include/linux/blktrace_api.h | 5 ++--- include/trace/events/block.h | 30 ++++++++++++------------------ kernel/trace/blktrace.c | 44 +++++++++++++++++--------------------------- 8 files changed, 39 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/block/blk-merge.c b/block/blk-merge.c index 4071daa88a5e..7497d86fff38 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -799,7 +799,7 @@ static struct request *attempt_merge(struct request_queue *q, */ blk_account_io_merge_request(next); - trace_block_rq_merge(q, next); + trace_block_rq_merge(next); /* * ownership of bio passed from next to req, return 'next' for diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d1eafe2c045c..deff4e826e23 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); void blk_mq_sched_request_inserted(struct request *rq) { - trace_block_rq_insert(rq->q, rq); + trace_block_rq_insert(rq); } EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); diff --git a/block/blk-mq.c b/block/blk-mq.c index 21e2b4b6b742..cf3916e2852f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -733,7 +733,7 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - trace_block_rq_issue(q, rq); + trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { rq->io_start_time_ns = ktime_get_ns(); @@ -760,7 +760,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); - trace_block_rq_requeue(q, rq); + trace_block_rq_requeue(rq); rq_qos_requeue(q, rq); if (blk_mq_request_started(rq)) { @@ -1821,7 +1821,7 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, lockdep_assert_held(&ctx->lock); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); if (at_head) list_add(&rq->queuelist, &ctx->rq_lists[type]); @@ -1878,7 +1878,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, */ list_for_each_entry(rq, list, queuelist) { BUG_ON(rq->mq_ctx != ctx); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); } spin_lock(&ctx->lock); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 729a72ec30cc..13b4385f4d5a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio) } /* The target has remapped the I/O so dispatch it */ - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), + trace_block_rq_remap(clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); ret = dm_dispatch_clone_request(clone, rq); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6cb963a06777..37d450f46952 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2359,8 +2359,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) } } - blk_add_driver_data(scsi->request->q, scsi->request, &blktrc, - sizeof(blktrc)); + blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc)); } /** diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 8fb89574d867..0d782663a005 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -64,7 +64,6 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, /** * block_rq_requeue - place block IO request back on a queue - * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue @@ -73,9 +72,9 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, */ TRACE_EVENT(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -147,9 +146,9 @@ TRACE_EVENT(block_rq_complete, DECLARE_EVENT_CLASS(block_rq, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -181,7 +180,6 @@ DECLARE_EVENT_CLASS(block_rq, /** * block_rq_insert - insert block operation request into queue - * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted @@ -191,14 +189,13 @@ DECLARE_EVENT_CLASS(block_rq, */ DEFINE_EVENT(block_rq, block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a @@ -206,14 +203,13 @@ DEFINE_EVENT(block_rq, block_rq_insert, */ DEFINE_EVENT(block_rq, block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another @@ -221,9 +217,9 @@ DEFINE_EVENT(block_rq, block_rq_issue, */ DEFINE_EVENT(block_rq, block_rq_merge, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** @@ -491,7 +487,6 @@ TRACE_EVENT(block_bio_remap, /** * block_rq_remap - map request for a block operation request - * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation @@ -502,10 +497,9 @@ TRACE_EVENT(block_bio_remap, */ TRACE_EVENT(block_rq_remap, - TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, - sector_t from), + TP_PROTO(struct request *rq, dev_t dev, sector_t from), - TP_ARGS(q, rq, dev, from), + TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 405637144a03..7839a78205c2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -795,12 +795,12 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) #endif static u64 -blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) +blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ - return blk_trace_bio_get_cgid(q, rq->bio); + return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* @@ -841,40 +841,35 @@ static void blk_add_trace_rq(struct request *rq, int error, rcu_read_unlock(); } -static void blk_add_trace_rq_insert(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_issue(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_merge(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_merge(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_requeue(void *ignore, - struct request_queue *q, - struct request *rq) +static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, int error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, - blk_trace_request_get_cgid(rq->q, rq)); + blk_trace_request_get_cgid(rq)); } /** @@ -1037,16 +1032,14 @@ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(void *ignore, - struct request_queue *q, - struct request *rq, dev_t dev, +static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1058,13 +1051,12 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data @@ -1073,14 +1065,12 @@ static void blk_add_trace_rq_remap(void *ignore, * Some drivers might want to write driver-specific data per request. * **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) +void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1088,7 +1078,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3 From 661d4f55a79483aee4970a76e3bd9d4cdc74ac79 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 22 Nov 2020 15:35:46 +0000 Subject: sbitmap: remove swap_lock map->swap_lock protects map->cleared from concurrent modification, however sbitmap_deferred_clear() is already atomically drains it, so it's guaranteed to not loose bits on concurrent sbitmap_deferred_clear(). A one threaded tag heavy test on top of nullbk showed ~1.5% t-put increase, and 3% -> 1% cycle reduction of sbitmap_get() according to perf. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 5 ----- lib/sbitmap.c | 14 +++----------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e40d019c3d9d..74cc6384715e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -32,11 +32,6 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; - - /** - * @swap_lock: Held while swapping word <-> cleared - */ - spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/lib/sbitmap.c b/lib/sbitmap.c index c1c8a4e69325..4fd877048ba8 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -15,13 +15,9 @@ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) { unsigned long mask, val; - bool ret = false; - unsigned long flags; - spin_lock_irqsave(&map->swap_lock, flags); - - if (!map->cleared) - goto out_unlock; + if (!READ_ONCE(map->cleared)) + return false; /* * First get a stable cleared mask, setting the old mask to 0. @@ -35,10 +31,7 @@ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) val = map->word; } while (cmpxchg(&map->word, val, val & ~mask) != val); - ret = true; -out_unlock: - spin_unlock_irqrestore(&map->swap_lock, flags); - return ret; + return true; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, @@ -80,7 +73,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; - spin_lock_init(&sb->map[i].swap_lock); } return 0; } -- cgit v1.2.3 From fb01a2932e81a1fb2273f87ff92dc8172b8880ee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Dec 2020 09:26:36 +0800 Subject: blk-mq: add new API of blk_mq_hctx_set_fq_lock_class flush_end_io() may be called recursively from some driver, such as nvme-loop, so lockdep may complain 'possible recursive locking'. Commit b3c6a5997541("block: Fix a lockdep complaint triggered by request queue flushing") tried to address this issue by assigning dynamically allocated per-flush-queue lock class. This solution adds synchronize_rcu() for each hctx's release handler, and causes horrible SCSI MQ probe delay(more than half an hour on megaraid sas). Add new API of blk_mq_hctx_set_fq_lock_class() for these drivers, so we just need to use driver specific lock class for avoiding the lockdep warning of 'possible recursive locking'. Tested-by: Kashyap Desai Reported-by: Qian Cai Cc: Sumit Saxena Cc: John Garry Cc: Kashyap Desai Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-flush.c | 25 +++++++++++++++++++++++++ include/linux/blk-mq.h | 3 +++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index 9507dcdd5881..bf51588762d8 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -490,3 +490,28 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) kfree(fq->flush_rq); kfree(fq); } + +/* + * Allow driver to set its own lock class to fq->mq_flush_lock for + * avoiding lockdep complaint. + * + * flush_end_io() may be called recursively from some driver, such as + * nvme-loop, so lockdep may complain 'possible recursive locking' because + * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class + * key. We need to assign different lock class for these driver's + * fq->mq_flush_lock for avoiding the lockdep warning. + * + * Use dynamically allocated lock class key for each 'blk_flush_queue' + * instance is over-kill, and more worse it introduces horrible boot delay + * issue because synchronize_rcu() is implied in lockdep_unregister_key which + * is called for each hctx release. SCSI probing may synchronously create and + * destroy lots of MQ request_queues for non-existent devices, and some robot + * test kernel always enable lockdep option. It is observed that more than half + * an hour is taken during SCSI MQ probe with per-fq lock class. + */ +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key) +{ + lockdep_set_class(&hctx->fq->mq_flush_lock, key); +} +EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..5f639240760e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -594,5 +595,7 @@ static inline void blk_mq_cleanup_rq(struct request *rq) } blk_qc_t blk_mq_submit_bio(struct bio *bio); +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); #endif -- cgit v1.2.3