diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2024-07-04 16:00:24 +0200 |
|---|---|---|
| committer | Peter Zijlstra <peterz@infradead.org> | 2024-07-04 16:00:24 +0200 |
| commit | 0c8ea05e9b3d8e5287e2a968f2a2e744dfd31b99 (patch) | |
| tree | ba0443b74f063471c5d81d85e795e04e7e1dc79c /drivers/block | |
| parent | 0ca4da2412da05fb9dd0b5d90dcc8026219f0f29 (diff) | |
| parent | 34b3fc558b537bdf99644dcde539e151716f6331 (diff) | |
Merge branch 'tip/x86/cpu'
The Lunarlake patches rely on the new VFM stuff.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'drivers/block')
| -rw-r--r-- | drivers/block/brd.c | 66 | ||||
| -rw-r--r-- | drivers/block/loop.c | 4 | ||||
| -rw-r--r-- | drivers/block/nbd.c | 41 | ||||
| -rw-r--r-- | drivers/block/null_blk/main.c | 85 | ||||
| -rw-r--r-- | drivers/block/null_blk/null_blk.h | 2 | ||||
| -rw-r--r-- | drivers/block/null_blk/trace.h | 7 | ||||
| -rw-r--r-- | drivers/block/null_blk/zoned.c | 358 | ||||
| -rw-r--r-- | drivers/block/pktcdvd.c | 7 | ||||
| -rw-r--r-- | drivers/block/rnbd/rnbd-srv-trace.h | 12 | ||||
| -rw-r--r-- | drivers/block/ublk_drv.c | 9 | ||||
| -rw-r--r-- | drivers/block/virtio_blk.c | 3 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.c | 60 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.h | 2 |
13 files changed, 368 insertions, 288 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index e322cef6596b..558d8e670566 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -29,10 +29,7 @@ /* * Each block ramdisk device has a xarray brd_pages of pages that stores - * the pages containing the block device's contents. A brd page's ->index is - * its offset in PAGE_SIZE units. This is similar to, but in no way connected - * with, the kernel's pagecache or buffer cache (which sit above our block - * device). + * the pages containing the block device's contents. */ struct brd_device { int brd_number; @@ -51,15 +48,7 @@ struct brd_device { */ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { - pgoff_t idx; - struct page *page; - - idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ - page = xa_load(&brd->brd_pages, idx); - - BUG_ON(page && page->index != idx); - - return page; + return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); } /* @@ -67,8 +56,8 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) */ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) { - pgoff_t idx; - struct page *page, *cur; + pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; + struct page *page; int ret = 0; page = brd_lookup_page(brd, sector); @@ -80,23 +69,16 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) return -ENOMEM; xa_lock(&brd->brd_pages); - - idx = sector >> PAGE_SECTORS_SHIFT; - page->index = idx; - - cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp); - - if (unlikely(cur)) { - __free_page(page); - ret = xa_err(cur); - if (!ret && (cur->index != idx)) - ret = -EIO; - } else { + ret = __xa_insert(&brd->brd_pages, idx, page, gfp); + if (!ret) brd->brd_nr_pages++; - } - xa_unlock(&brd->brd_pages); + if (ret < 0) { + __free_page(page); + if (ret == -EBUSY) + ret = 0; + } return ret; } @@ -240,6 +222,23 @@ out: return err; } +static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) +{ + sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS; + struct page *page; + + size -= (aligned_sector - sector) * SECTOR_SIZE; + xa_lock(&brd->brd_pages); + while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) { + page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); + if (page) + __free_page(page); + aligned_sector += PAGE_SECTORS; + size -= PAGE_SIZE; + } + xa_unlock(&brd->brd_pages); +} + static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; @@ -247,6 +246,12 @@ static void brd_submit_bio(struct bio *bio) struct bio_vec bvec; struct bvec_iter iter; + if (unlikely(op_is_discard(bio->bi_opf))) { + brd_do_discard(brd, sector, bio->bi_iter.bi_size); + bio_endio(bio); + return; + } + bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; @@ -327,6 +332,9 @@ static int brd_alloc(int i) * is harmless) */ .physical_block_size = PAGE_SIZE, + .max_hw_discard_sectors = UINT_MAX, + .max_discard_segments = 1, + .discard_granularity = PAGE_SIZE, }; list_for_each_entry(brd, &brd_devices, brd_list) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 28a95fd366fe..93780f41646b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -445,9 +445,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); if (rw == ITER_SOURCE) - ret = call_write_iter(file, &cmd->iocb, &iter); + ret = file->f_op->write_iter(&cmd->iocb, &iter); else - ret = call_read_iter(file, &cmd->iocb, &iter); + ret = file->f_op->read_iter(&cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9d4ec9273bf9..22a79a62cc4e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -222,7 +222,7 @@ static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); - struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + struct nbd_device *nbd = disk->private_data; return sprintf(buf, "%d\n", nbd->pid); } @@ -236,7 +236,7 @@ static ssize_t backend_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); - struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + struct nbd_device *nbd = disk->private_data; return sprintf(buf, "%s\n", nbd->backend ?: ""); } @@ -588,7 +588,10 @@ static inline int was_interrupted(int result) return result == -ERESTARTSYS || result == -EINTR; } -/* always call with the tx_lock held */ +/* + * Returns BLK_STS_RESOURCE if the caller should retry after a delay. Returns + * -EAGAIN if the caller should requeue @cmd. Returns -EIO if sending failed. + */ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); @@ -598,13 +601,15 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; struct iov_iter from; - unsigned long size = blk_rq_bytes(req); struct bio *bio; u64 handle; u32 type; u32 nbd_cmd_flags = 0; int sent = nsock->sent, skip = 0; + lockdep_assert_held(&cmd->lock); + lockdep_assert_held(&nsock->tx_lock); + iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request)); type = req_to_nbd_cmd_type(req); @@ -644,7 +649,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) request.type = htonl(type | nbd_cmd_flags); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + request.len = htonl(blk_rq_bytes(req)); } handle = nbd_cmd_handle(cmd); request.cookie = cpu_to_be64(handle); @@ -669,7 +674,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) nsock->sent = sent; } set_bit(NBD_CMD_REQUEUED, &cmd->flags); - return BLK_STS_RESOURCE; + return (__force int)BLK_STS_RESOURCE; } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); @@ -710,7 +715,7 @@ send_pages: nsock->pending = req; nsock->sent = sent; set_bit(NBD_CMD_REQUEUED, &cmd->flags); - return BLK_STS_RESOURCE; + return (__force int)BLK_STS_RESOURCE; } dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", @@ -1007,7 +1012,7 @@ static int wait_for_reconnect(struct nbd_device *nbd) return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags); } -static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) +static blk_status_t nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; @@ -1015,18 +1020,20 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) struct nbd_sock *nsock; int ret; + lockdep_assert_held(&cmd->lock); + config = nbd_get_config_unlocked(nbd); if (!config) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Socks array is empty\n"); - return -EINVAL; + return BLK_STS_IOERR; } if (index >= config->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on invalid socket\n"); nbd_config_put(nbd); - return -EINVAL; + return BLK_STS_IOERR; } cmd->status = BLK_STS_OK; again: @@ -1049,7 +1056,7 @@ again: */ sock_shutdown(nbd); nbd_config_put(nbd); - return -EIO; + return BLK_STS_IOERR; } goto again; } @@ -1062,7 +1069,7 @@ again: blk_mq_start_request(req); if (unlikely(nsock->pending && nsock->pending != req)) { nbd_requeue_cmd(cmd); - ret = 0; + ret = BLK_STS_OK; goto out; } /* @@ -1081,19 +1088,19 @@ again: "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); nbd_requeue_cmd(cmd); - ret = 0; + ret = BLK_STS_OK; } out: mutex_unlock(&nsock->tx_lock); nbd_config_put(nbd); - return ret; + return ret < 0 ? BLK_STS_IOERR : (__force blk_status_t)ret; } static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - int ret; + blk_status_t ret; /* * Since we look at the bio's to send the request over the network we @@ -1113,10 +1120,6 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * appropriate. */ ret = nbd_handle_cmd(cmd, hctx->queue_num); - if (ret < 0) - ret = BLK_STS_IOERR; - else if (!ret) - ret = BLK_STS_OK; mutex_unlock(&cmd->lock); return ret; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index ed33cf7192d2..eb023d267369 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -225,6 +225,10 @@ static unsigned long g_cache_size; module_param_named(cache_size, g_cache_size, ulong, 0444); MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)"); +static bool g_fua = true; +module_param_named(fua, g_fua, bool, 0444); +MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true"); + static unsigned int g_mbps; module_param_named(mbps, g_mbps, uint, 0444); MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)"); @@ -253,6 +257,11 @@ static unsigned int g_zone_max_active; module_param_named(zone_max_active, g_zone_max_active, uint, 0444); MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)"); +static int g_zone_append_max_sectors = INT_MAX; +module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444); +MODULE_PARM_DESC(zone_append_max_sectors, + "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -404,13 +413,25 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev, static int nullb_apply_submit_queues(struct nullb_device *dev, unsigned int submit_queues) { - return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); + int ret; + + mutex_lock(&lock); + ret = nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); + mutex_unlock(&lock); + + return ret; } static int nullb_apply_poll_queues(struct nullb_device *dev, unsigned int poll_queues) { - return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); + int ret; + + mutex_lock(&lock); + ret = nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); + mutex_unlock(&lock); + + return ret; } NULLB_DEVICE_ATTR(size, ulong, NULL); @@ -436,10 +457,12 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); +NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); +NULLB_DEVICE_ATTR(fua, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -457,28 +480,31 @@ static ssize_t nullb_device_power_store(struct config_item *item, if (ret < 0) return ret; + ret = count; + mutex_lock(&lock); if (!dev->power && newp) { if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) - return count; + goto out; + ret = null_add_dev(dev); if (ret) { clear_bit(NULLB_DEV_FL_UP, &dev->flags); - return ret; + goto out; } set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); dev->power = newp; } else if (dev->power && !newp) { if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) { - mutex_lock(&lock); dev->power = newp; null_del_dev(dev->nullb); - mutex_unlock(&lock); } clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); } - return count; +out: + mutex_unlock(&lock); + return ret; } CONFIGFS_ATTR(nullb_device_, power); @@ -580,12 +606,14 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_max_open, &nullb_device_attr_zone_max_active, + &nullb_device_attr_zone_append_max_sectors, &nullb_device_attr_zone_readonly, &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, &nullb_device_attr_no_sched, &nullb_device_attr_shared_tags, &nullb_device_attr_shared_tag_bitmap, + &nullb_device_attr_fua, NULL, }; @@ -664,14 +692,14 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "badblocks,blocking,blocksize,cache_size," + "badblocks,blocking,blocksize,cache_size,fua," "completion_nsec,discard,home_node,hw_queue_depth," "irqmode,max_sectors,mbps,memory_backed,no_sched," "poll_queues,power,queue_mode,shared_tag_bitmap," "shared_tags,size,submit_queues,use_per_node_hctx," "virt_boundary,zoned,zone_capacity,zone_max_active," "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," - "zone_size\n"); + "zone_size,zone_append_max_sectors\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -751,10 +779,13 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_nr_conv = g_zone_nr_conv; dev->zone_max_open = g_zone_max_open; dev->zone_max_active = g_zone_max_active; + dev->zone_append_max_sectors = g_zone_append_max_sectors; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; + dev->fua = g_fua; + return dev; } @@ -1151,7 +1182,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev, return BLK_STS_OK; } -static int null_handle_flush(struct nullb *nullb) +static blk_status_t null_handle_flush(struct nullb *nullb) { int err; @@ -1168,7 +1199,7 @@ static int null_handle_flush(struct nullb *nullb) WARN_ON(!radix_tree_empty(&nullb->dev->cache)); spin_unlock_irq(&nullb->lock); - return err; + return errno_to_blk_status(err); } static int null_transfer(struct nullb *nullb, struct page *page, @@ -1202,11 +1233,11 @@ static int null_transfer(struct nullb *nullb, struct page *page, return err; } -static int null_handle_rq(struct nullb_cmd *cmd) +static blk_status_t null_handle_rq(struct nullb_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; - int err; + int err = 0; unsigned int len; sector_t sector = blk_rq_pos(rq); struct req_iterator iter; @@ -1218,15 +1249,13 @@ static int null_handle_rq(struct nullb_cmd *cmd) err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, op_is_write(req_op(rq)), sector, rq->cmd_flags & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } + if (err) + break; sector += len >> SECTOR_SHIFT; } spin_unlock_irq(&nullb->lock); - return 0; + return errno_to_blk_status(err); } static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) @@ -1273,8 +1302,8 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); - return errno_to_blk_status(null_handle_rq(cmd)); + return null_handle_rq(cmd); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) @@ -1343,7 +1372,7 @@ static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, blk_status_t sts; if (op == REQ_OP_FLUSH) { - cmd->error = errno_to_blk_status(null_handle_flush(nullb)); + cmd->error = null_handle_flush(nullb); goto out; } @@ -1912,21 +1941,18 @@ static int null_add_dev(struct nullb_device *dev) if (dev->cache_size > 0) { set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); - blk_queue_write_cache(nullb->q, true, true); + blk_queue_write_cache(nullb->q, true, dev->fua); } nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); - mutex_lock(&lock); rv = ida_alloc(&nullb_indexes, GFP_KERNEL); - if (rv < 0) { - mutex_unlock(&lock); + if (rv < 0) goto out_cleanup_disk; - } + nullb->index = rv; dev->index = rv; - mutex_unlock(&lock); if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ @@ -1955,9 +1981,7 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_ida_free; - mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); - mutex_unlock(&lock); pr_info("disk %s created\n", nullb->disk_name); @@ -2006,7 +2030,9 @@ static int null_create_dev(void) if (!dev) return -ENOMEM; + mutex_lock(&lock); ret = null_add_dev(dev); + mutex_unlock(&lock); if (ret) { null_free_dev(dev); return ret; @@ -2113,10 +2139,13 @@ static void __exit null_exit(void) if (tag_set.ops) blk_mq_free_tag_set(&tag_set); + + mutex_destroy(&lock); } module_init(null_init); module_exit(null_exit); MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>"); +MODULE_DESCRIPTION("multi queue aware block test driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 477b97746823..3234e6c85eed 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -82,6 +82,7 @@ struct nullb_device { unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int zone_max_open; /* max number of open zones */ unsigned int zone_max_active; /* max number of active zones */ + unsigned int zone_append_max_sectors; /* Max sectors per zone append command */ unsigned int submit_queues; /* number of submission queues */ unsigned int prev_submit_queues; /* number of submission queues before change */ unsigned int poll_queues; /* number of IOPOLL submission queues */ @@ -104,6 +105,7 @@ struct nullb_device { bool no_sched; /* no IO scheduler for the device */ bool shared_tags; /* share tag set between devices for blk-mq */ bool shared_tag_bitmap; /* use hostwide shared tags */ + bool fua; /* Support FUA */ }; struct nullb { diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h index ef2d05d5f0df..82b8f6a5e5f0 100644 --- a/drivers/block/null_blk/trace.h +++ b/drivers/block/null_blk/trace.h @@ -36,7 +36,12 @@ TRACE_EVENT(nullb_zone_op, TP_ARGS(cmd, zone_no, zone_cond), TP_STRUCT__entry( __array(char, disk, DISK_NAME_LEN) - __field(enum req_op, op) + /* + * __field() uses is_signed_type(). is_signed_type() does not + * support bitwise types. Use __field_struct() instead because + * it does not use is_signed_type(). + */ + __field_struct(enum req_op, op) __field(unsigned int, zone_no) __field(unsigned int, zone_cond) ), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 1689e2584104..5b5a63adacc1 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -9,6 +9,8 @@ #undef pr_fmt #define pr_fmt(fmt) "null_blk: " fmt +#define NULL_ZONE_INVALID_WP ((sector_t)-1) + static inline sector_t mb_to_sects(unsigned long mb) { return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT; @@ -19,18 +21,6 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) return sect >> ilog2(dev->zone_size_sects); } -static inline void null_lock_zone_res(struct nullb_device *dev) -{ - if (dev->need_zone_res_mgmt) - spin_lock_irq(&dev->zone_res_lock); -} - -static inline void null_unlock_zone_res(struct nullb_device *dev) -{ - if (dev->need_zone_res_mgmt) - spin_unlock_irq(&dev->zone_res_lock); -} - static inline void null_init_zone_lock(struct nullb_device *dev, struct nullb_zone *zone) { @@ -103,6 +93,11 @@ int null_init_zoned_dev(struct nullb_device *dev, dev->zone_nr_conv); } + dev->zone_append_max_sectors = + min(ALIGN_DOWN(dev->zone_append_max_sectors, + dev->blocksize >> SECTOR_SHIFT), + zone_capacity_sects); + /* Max active zones has to be < nbr of seq zones in order to be enforceable */ if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) { dev->zone_max_active = 0; @@ -154,7 +149,7 @@ int null_init_zoned_dev(struct nullb_device *dev, lim->zoned = true; lim->chunk_sectors = dev->zone_size_sects; - lim->max_zone_append_sectors = dev->zone_size_sects; + lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; lim->max_active_zones = dev->zone_max_active; return 0; @@ -163,11 +158,16 @@ int null_init_zoned_dev(struct nullb_device *dev, int null_register_zoned_dev(struct nullb *nullb) { struct request_queue *q = nullb->q; + struct gendisk *disk = nullb->disk; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); - nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - return blk_revalidate_disk_zones(nullb->disk, NULL); + disk->nr_zones = bdev_nr_zones(disk->part0); + + pr_info("%s: using %s zone append\n", + disk->disk_name, + queue_emulates_zone_append(q) ? "emulated" : "native"); + + return blk_revalidate_disk_zones(disk); } void null_free_zoned_dev(struct nullb_device *dev) @@ -241,35 +241,6 @@ size_t null_zone_valid_read_len(struct nullb *nullb, return (zone->wp - sector) << SECTOR_SHIFT; } -static blk_status_t __null_close_zone(struct nullb_device *dev, - struct nullb_zone *zone) -{ - switch (zone->cond) { - case BLK_ZONE_COND_CLOSED: - /* close operation on closed is not an error */ - return BLK_STS_OK; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_EMPTY: - case BLK_ZONE_COND_FULL: - default: - return BLK_STS_IOERR; - } - - if (zone->wp == zone->start) { - zone->cond = BLK_ZONE_COND_EMPTY; - } else { - zone->cond = BLK_ZONE_COND_CLOSED; - dev->nr_zones_closed++; - } - - return BLK_STS_OK; -} - static void null_close_imp_open_zone(struct nullb_device *dev) { struct nullb_zone *zone; @@ -286,7 +257,13 @@ static void null_close_imp_open_zone(struct nullb_device *dev) zno = dev->zone_nr_conv; if (zone->cond == BLK_ZONE_COND_IMP_OPEN) { - __null_close_zone(dev, zone); + dev->nr_zones_imp_open--; + if (zone->wp == zone->start) { + zone->cond = BLK_ZONE_COND_EMPTY; + } else { + zone->cond = BLK_ZONE_COND_CLOSED; + dev->nr_zones_closed++; + } dev->imp_close_zone_no = zno; return; } @@ -374,73 +351,73 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, null_lock_zone(dev, zone); - if (zone->cond == BLK_ZONE_COND_FULL || - zone->cond == BLK_ZONE_COND_READONLY || - zone->cond == BLK_ZONE_COND_OFFLINE) { - /* Cannot write to the zone */ - ret = BLK_STS_IOERR; - goto unlock; - } - /* - * Regular writes must be at the write pointer position. - * Zone append writes are automatically issued at the write - * pointer and the position returned using the request or BIO - * sector. + * Regular writes must be at the write pointer position. Zone append + * writes are automatically issued at the write pointer and the position + * returned using the request sector. Note that we do not check the zone + * condition because for FULL, READONLY and OFFLINE zones, the sector + * check against the zone write pointer will always result in failing + * the command. */ if (append) { + if (WARN_ON_ONCE(!dev->zone_append_max_sectors) || + zone->wp == NULL_ZONE_INVALID_WP) { + ret = BLK_STS_IOERR; + goto unlock_zone; + } sector = zone->wp; blk_mq_rq_from_pdu(cmd)->__sector = sector; - } else if (sector != zone->wp) { - ret = BLK_STS_IOERR; - goto unlock; } - if (zone->wp + nr_sectors > zone->start + zone->capacity) { + if (sector != zone->wp || + zone->wp + nr_sectors > zone->start + zone->capacity) { ret = BLK_STS_IOERR; - goto unlock; + goto unlock_zone; } if (zone->cond == BLK_ZONE_COND_CLOSED || zone->cond == BLK_ZONE_COND_EMPTY) { - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) { - null_unlock_zone_res(dev); - goto unlock; - } - if (zone->cond == BLK_ZONE_COND_CLOSED) { - dev->nr_zones_closed--; - dev->nr_zones_imp_open++; - } else if (zone->cond == BLK_ZONE_COND_EMPTY) { - dev->nr_zones_imp_open++; - } + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + goto unlock_zone; + } + if (zone->cond == BLK_ZONE_COND_CLOSED) { + dev->nr_zones_closed--; + dev->nr_zones_imp_open++; + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { + dev->nr_zones_imp_open++; + } - if (zone->cond != BLK_ZONE_COND_EXP_OPEN) - zone->cond = BLK_ZONE_COND_IMP_OPEN; + spin_unlock(&dev->zone_res_lock); + } - null_unlock_zone_res(dev); + zone->cond = BLK_ZONE_COND_IMP_OPEN; } ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); if (ret != BLK_STS_OK) - goto unlock; + goto unlock_zone; zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { - null_lock_zone_res(dev); - if (zone->cond == BLK_ZONE_COND_EXP_OPEN) - dev->nr_zones_exp_open--; - else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) - dev->nr_zones_imp_open--; + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); + if (zone->cond == BLK_ZONE_COND_EXP_OPEN) + dev->nr_zones_exp_open--; + else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) + dev->nr_zones_imp_open--; + spin_unlock(&dev->zone_res_lock); + } zone->cond = BLK_ZONE_COND_FULL; - null_unlock_zone_res(dev); } ret = BLK_STS_OK; -unlock: +unlock_zone: null_unlock_zone(dev, zone); return ret; @@ -454,54 +431,100 @@ static blk_status_t null_open_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); - switch (zone->cond) { case BLK_ZONE_COND_EXP_OPEN: - /* open operation on exp open is not an error */ - goto unlock; + /* Open operation on exp open is not an error */ + return BLK_STS_OK; case BLK_ZONE_COND_EMPTY: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - dev->nr_zones_closed--; break; case BLK_ZONE_COND_FULL: default: - ret = BLK_STS_IOERR; - goto unlock; + return BLK_STS_IOERR; } - zone->cond = BLK_ZONE_COND_EXP_OPEN; - dev->nr_zones_exp_open++; + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); -unlock: - null_unlock_zone_res(dev); + switch (zone->cond) { + case BLK_ZONE_COND_EMPTY: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_CLOSED: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + dev->nr_zones_closed--; + break; + default: + break; + } - return ret; + dev->nr_zones_exp_open++; + + spin_unlock(&dev->zone_res_lock); + } + + zone->cond = BLK_ZONE_COND_EXP_OPEN; + + return BLK_STS_OK; } static blk_status_t null_close_zone(struct nullb_device *dev, struct nullb_zone *zone) { - blk_status_t ret; - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); - ret = __null_close_zone(dev, zone); - null_unlock_zone_res(dev); + switch (zone->cond) { + case BLK_ZONE_COND_CLOSED: + /* close operation on closed is not an error */ + return BLK_STS_OK; + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + default: + return BLK_STS_IOERR; + } + + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - return ret; + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + default: + break; + } + + if (zone->wp > zone->start) + dev->nr_zones_closed++; + + spin_unlock(&dev->zone_res_lock); + } + + if (zone->wp == zone->start) + zone->cond = BLK_ZONE_COND_EMPTY; + else + zone->cond = BLK_ZONE_COND_CLOSED; + + return BLK_STS_OK; } static blk_status_t null_finish_zone(struct nullb_device *dev, @@ -512,41 +535,47 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - switch (zone->cond) { - case BLK_ZONE_COND_FULL: - /* finish operation on full is not an error */ - goto unlock; - case BLK_ZONE_COND_EMPTY: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - dev->nr_zones_closed--; - break; - default: - ret = BLK_STS_IOERR; - goto unlock; + switch (zone->cond) { + case BLK_ZONE_COND_FULL: + /* Finish operation on full is not an error */ + spin_unlock(&dev->zone_res_lock); + return BLK_STS_OK; + case BLK_ZONE_COND_EMPTY: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + dev->nr_zones_closed--; + break; + default: + spin_unlock(&dev->zone_res_lock); + return BLK_STS_IOERR; + } + + spin_unlock(&dev->zone_res_lock); } zone->cond = BLK_ZONE_COND_FULL; zone->wp = zone->start + zone->len; -unlock: - null_unlock_zone_res(dev); - - return ret; + return BLK_STS_OK; } static blk_status_t null_reset_zone(struct nullb_device *dev, @@ -555,34 +584,33 @@ static blk_status_t null_reset_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - switch (zone->cond) { - case BLK_ZONE_COND_EMPTY: - /* reset operation on empty is not an error */ - null_unlock_zone_res(dev); - return BLK_STS_OK; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_CLOSED: - dev->nr_zones_closed--; - break; - case BLK_ZONE_COND_FULL: - break; - default: - null_unlock_zone_res(dev); - return BLK_STS_IOERR; + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + dev->nr_zones_closed--; + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + break; + default: + spin_unlock(&dev->zone_res_lock); + return BLK_STS_IOERR; + } + + spin_unlock(&dev->zone_res_lock); } zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; - null_unlock_zone_res(dev); - if (dev->memory_backed) return null_handle_discard(dev, zone->start, zone->len); @@ -711,7 +739,7 @@ static void null_set_zone_cond(struct nullb_device *dev, zone->cond != BLK_ZONE_COND_OFFLINE) null_finish_zone(dev, zone); zone->cond = cond; - zone->wp = (sector_t)-1; + zone->wp = NULL_ZONE_INVALID_WP; } null_unlock_zone(dev, zone); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 21728e9ea5c3..8a2ce8070010 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2215,6 +2215,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) } dev_info(ddev, "%lukB available on disc\n", lba << 1); } + set_blocksize(bdev_file, CD_FRAMESIZE); return 0; @@ -2278,11 +2279,6 @@ static int pkt_open(struct gendisk *disk, blk_mode_t mode) ret = pkt_open_dev(pd, mode & BLK_OPEN_WRITE); if (ret) goto out_dec; - /* - * needed here as well, since ext2 (among others) may change - * the blocksize at mount time - */ - set_blocksize(disk->part0, CD_FRAMESIZE); } mutex_unlock(&ctl_mutex); mutex_unlock(&pktcdvd_mutex); @@ -2526,7 +2522,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) __module_get(THIS_MODULE); pd->bdev_file = bdev_file; - set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE); atomic_set(&pd->cdrw.pending_bios, 0); pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name); diff --git a/drivers/block/rnbd/rnbd-srv-trace.h b/drivers/block/rnbd/rnbd-srv-trace.h index 8dedf73bdd28..89d0bcb17195 100644 --- a/drivers/block/rnbd/rnbd-srv-trace.h +++ b/drivers/block/rnbd/rnbd-srv-trace.h @@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(rnbd_srv_link_class, TP_fast_assign( __entry->qdepth = srv->queue_depth; - __assign_str(sessname, srv->sessname); + __assign_str(sessname); ), TP_printk("sessname: %s qdepth: %d", @@ -85,7 +85,7 @@ TRACE_EVENT(process_rdma, ), TP_fast_assign( - __assign_str(sessname, srv->sessname); + __assign_str(sessname); __entry->dir = id->dir; __entry->ver = srv->ver; __entry->device_id = le32_to_cpu(msg->device_id); @@ -130,7 +130,7 @@ TRACE_EVENT(process_msg_sess_info, __entry->proto_ver = srv->ver; __entry->clt_ver = msg->ver; __entry->srv_ver = RNBD_PROTO_VER_MAJOR; - __assign_str(sessname, srv->sessname); + __assign_str(sessname); ), TP_printk("Session %s using proto-ver %d (clt-ver: %d, srv-ver: %d)", @@ -165,8 +165,8 @@ TRACE_EVENT(process_msg_open, TP_fast_assign( __entry->access_mode = msg->access_mode; - __assign_str(sessname, srv->sessname); - __assign_str(dev_name, msg->dev_name); + __assign_str(sessname); + __assign_str(dev_name); ), TP_printk("Open message received: session='%s' path='%s' access_mode=%s", @@ -189,7 +189,7 @@ TRACE_EVENT(process_msg_close, TP_fast_assign( __entry->device_id = le32_to_cpu(msg->device_id); - __assign_str(sessname, srv->sessname); + __assign_str(sessname); ), TP_printk("Close message received: session='%s' device id='%d'", diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bea3d5cf8a83..4e159948c912 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -221,7 +221,7 @@ static int ublk_get_nr_zones(const struct ublk_device *ub) static int ublk_revalidate_disk_zones(struct ublk_device *ub) { - return blk_revalidate_disk_zones(ub->ub_disk, NULL); + return blk_revalidate_disk_zones(ub->ub_disk); } static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) @@ -249,8 +249,7 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); - blk_queue_required_elevator_features(ub->ub_disk->queue, - ELEVATOR_F_ZBD_SEQ_WRITE); + ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); } @@ -2177,7 +2176,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) .max_hw_sectors = p->max_sectors, .chunk_sectors = p->chunk_sectors, .virt_boundary_mask = p->virt_boundary_mask, - + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + .dma_alignment = 3, }; struct gendisk *disk; int ret = -EINVAL; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42dea7601d87..2351f411fa46 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1543,7 +1543,7 @@ static int virtblk_probe(struct virtio_device *vdev) */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); - err = blk_revalidate_disk_zones(vblk->disk, NULL); + err = blk_revalidate_disk_zones(vblk->disk); if (err) goto out_cleanup_disk; } @@ -1658,7 +1658,6 @@ static struct virtio_driver virtio_blk = { .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtblk_probe, .remove = virtblk_remove, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f0639df6cd18..3acd7006ad2c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -426,11 +426,10 @@ static void reset_bdev(struct zram *zram) if (!zram->backing_dev) return; - fput(zram->bdev_file); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->bdev_file = NULL; + zram->bdev = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; @@ -473,10 +472,8 @@ static ssize_t backing_dev_store(struct device *dev, size_t sz; struct file *backing_dev = NULL; struct inode *inode; - struct address_space *mapping; unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; - struct file *bdev_file = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -497,15 +494,14 @@ static ssize_t backing_dev_store(struct device *dev, if (sz > 0 && file_name[sz - 1] == '\n') file_name[sz - 1] = 0x00; - backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); + backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); if (IS_ERR(backing_dev)) { err = PTR_ERR(backing_dev); backing_dev = NULL; goto out; } - mapping = backing_dev->f_mapping; - inode = mapping->host; + inode = backing_dev->f_mapping->host; /* Support only block device in this moment */ if (!S_ISBLK(inode->i_mode)) { @@ -513,14 +509,6 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev_file = bdev_file_open_by_dev(inode->i_rdev, - BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL); - if (IS_ERR(bdev_file)) { - err = PTR_ERR(bdev_file); - bdev_file = NULL; - goto out; - } - nr_pages = i_size_read(inode) >> PAGE_SHIFT; bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); @@ -531,7 +519,7 @@ static ssize_t backing_dev_store(struct device *dev, reset_bdev(zram); - zram->bdev_file = bdev_file; + zram->bdev = I_BDEV(inode); zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; @@ -544,9 +532,6 @@ static ssize_t backing_dev_store(struct device *dev, out: kvfree(bitmap); - if (bdev_file) - fput(bdev_file); - if (backing_dev) filp_close(backing_dev, NULL); @@ -587,7 +572,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page, { struct bio *bio; - bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO); + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); __bio_add_page(bio, page, PAGE_SIZE, 0); bio_chain(bio, parent); @@ -703,7 +688,7 @@ static ssize_t writeback_store(struct device *dev, continue; } - bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1, + bio_init(&bio, zram->bdev, &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); __bio_add_page(&bio, page, PAGE_SIZE, 0); @@ -785,7 +770,7 @@ static void zram_sync_read(struct work_struct *work) struct bio_vec bv; struct bio bio; - bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ); + bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); zw->error = submit_bio_wait(&bio); @@ -1568,7 +1553,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, * Corresponding ZRAM slot should be locked. */ static int zram_recompress(struct zram *zram, u32 index, struct page *page, - u32 threshold, u32 prio, u32 prio_max) + u64 *num_recomp_pages, u32 threshold, u32 prio, + u32 prio_max) { struct zcomp_strm *zstrm = NULL; unsigned long handle_old; @@ -1645,6 +1631,15 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (!zstrm) return 0; + /* + * Decrement the limit (if set) on pages we can recompress, even + * when current recompression was unsuccessful or did not compress + * the page below the threshold, because we still spent resources + * on it. + */ + if (*num_recomp_pages) + *num_recomp_pages -= 1; + if (class_index_new >= class_index_old) { /* * Secondary algorithms failed to re-compress the page @@ -1710,6 +1705,7 @@ static ssize_t recompress_store(struct device *dev, struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; char *args, *param, *val, *algo = NULL; + u64 num_recomp_pages = ULLONG_MAX; u32 mode = 0, threshold = 0; unsigned long index; struct page *page; @@ -1732,6 +1728,17 @@ static ssize_t recompress_store(struct device *dev, continue; } + if (!strcmp(param, "max_pages")) { + /* + * Limit the number of entries (pages) we attempt to + * recompress. + */ + ret = kstrtoull(val, 10, &num_recomp_pages); + if (ret) + return ret; + continue; + } + if (!strcmp(param, "threshold")) { /* * We will re-compress only idle objects equal or @@ -1788,6 +1795,9 @@ static ssize_t recompress_store(struct device *dev, for (index = 0; index < nr_pages; index++) { int err = 0; + if (!num_recomp_pages) + break; + zram_slot_lock(zram, index); if (!zram_allocated(zram, index)) @@ -1807,8 +1817,8 @@ static ssize_t recompress_store(struct device *dev, zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; - err = zram_recompress(zram, index, page, threshold, - prio, prio_max); + err = zram_recompress(zram, index, page, &num_recomp_pages, + threshold, prio, prio_max); next: zram_slot_unlock(zram, index); if (err) { diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 37bf29f34d26..35e322144629 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -132,7 +132,7 @@ struct zram { spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; - struct file *bdev_file; + struct block_device *bdev; unsigned long *bitmap; unsigned long nr_pages; #endif |
