From 6b1d83d274486615cc341e410467a98fd9c27c0a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:19 -0700 Subject: block: Remove bdev_nr_zones() Remove this function since it has no callers. This function was introduced in commit 6cc77e9cb080 ("block: introduce zoned block devices zone write locking"). Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Christoph Hellwig Cc: Matias Bjorling Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79226ca8f80f..49a400afb146 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1639,15 +1639,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) return 0; } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_nr_zones(q); - return 0; -} - static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 7c8542b7982264226cf94102950343185869b584 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:20 -0700 Subject: block: Inline blk_queue_nr_zones() Since the implementation of blk_queue_nr_zones() is trivial and since it only has a single caller, inline this function. Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Matias Bjorling Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- include/linux/blkdev.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1c4532e92938..26e1f8e425a8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -214,7 +214,7 @@ static int queue_zone_wlock_show(void *data, struct seq_file *m) if (!q->seq_zones_wlock) return 0; - for (i = 0; i < blk_queue_nr_zones(q); i++) + for (i = 0; i < q->nr_zones; i++) if (test_bit(i, q->seq_zones_wlock)) seq_printf(m, "%u\n", i); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49a400afb146..905daa7c647e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -800,11 +800,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) -{ - return q->nr_zones; -} - static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { -- cgit v1.2.3 From 6a5ac9846508ad7d1d23881d9d5add35f2e6ae71 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:21 -0700 Subject: block: Make struct request_queue smaller for CONFIG_BLK_DEV_ZONED=n Exclude zoned block device members from struct request_queue for CONFIG_BLK_DEV_ZONED == n. Avoid breaking the build by only building the code that uses these struct request_queue members if CONFIG_BLK_DEV_ZONED != n. Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Matias Bjorling Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/Kconfig | 4 ++++ block/Makefile | 1 + block/blk-mq-debugfs-zoned.c | 24 ++++++++++++++++++++++++ block/blk-mq-debugfs.c | 15 --------------- block/blk-mq-debugfs.h | 9 +++++++++ include/linux/blkdev.h | 6 ++++++ 6 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 block/blk-mq-debugfs-zoned.c (limited to 'include/linux/blkdev.h') diff --git a/block/Kconfig b/block/Kconfig index eb50fd4977c2..dfe7bc770fc9 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -177,6 +177,10 @@ config BLK_DEBUG_FS Unless you are building a kernel for a tiny system, you should say Y here. +config BLK_DEBUG_FS_ZONED + bool + default BLK_DEBUG_FS && BLK_DEV_ZONED + config BLK_SED_OPAL bool "Logic for interfacing with Opal enabled SEDs" ---help--- diff --git a/block/Makefile b/block/Makefile index 6a56303b9925..a8f94cdb75c3 100644 --- a/block/Makefile +++ b/block/Makefile @@ -34,4 +34,5 @@ obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o +obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c new file mode 100644 index 000000000000..fb2c82c351e4 --- /dev/null +++ b/block/blk-mq-debugfs-zoned.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Western Digital Corporation or its affiliates. + * + * This file is released under the GPL. + */ + +#include +#include "blk-mq-debugfs.h" + +int queue_zone_wlock_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + unsigned int i; + + if (!q->seq_zones_wlock) + return 0; + + for (i = 0; i < q->nr_zones; i++) + if (test_bit(i, q->seq_zones_wlock)) + seq_printf(m, "%u\n", i); + + return 0; +} diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 26e1f8e425a8..7efe268e4447 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -206,21 +206,6 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, return count; } -static int queue_zone_wlock_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - unsigned int i; - - if (!q->seq_zones_wlock) - return 0; - - for (i = 0; i < q->nr_zones; i++) - if (test_bit(i, q->seq_zones_wlock)) - seq_printf(m, "%u\n", i); - - return 0; -} - static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index b9d366e57097..a9160be12be0 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -80,4 +80,13 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc } #endif +#ifdef CONFIG_BLK_DEBUG_FS_ZONED +int queue_zone_wlock_show(void *data, struct seq_file *m); +#else +static inline int queue_zone_wlock_show(void *data, struct seq_file *m) +{ + return 0; +} +#endif + #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 905daa7c647e..ca5a8b046894 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -592,6 +592,7 @@ struct request_queue { struct queue_limits limits; +#ifdef CONFIG_BLK_DEV_ZONED /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always @@ -612,6 +613,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *seq_zones_bitmap; unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ /* * sg stuff @@ -800,6 +802,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } +#ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { @@ -815,6 +818,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return false; return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); } +#endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) { @@ -1065,6 +1069,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } +#ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); @@ -1074,6 +1079,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); } +#endif /* CONFIG_BLK_DEV_ZONED */ /* * Some commands like WRITE SAME have a payload or data transfer size which -- cgit v1.2.3 From 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Jun 2018 19:31:48 +0800 Subject: blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set() We have to remove synchronize_rcu() from blk_queue_cleanup(), otherwise long delay can be caused during lun probe. For removing it, we have to avoid to iterate the set->tag_list in IO path, eg, blk_mq_sched_restart(). This patch reverts 5b79413946d (Revert "blk-mq: don't handle TAG_SHARED in restart"). Given we have fixed enough IO hang issue, and there isn't any reason to restart all queues in one tags any more, see the following reasons: 1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(), which can wake up queues waiting for driver tag. 2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue, target or host is ready, but SCSI built-in restart can cover all these well, see scsi_end_request(), queue will be rerun after any request initiated from this host/target is completed. In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30% when running I/O on these 8 luns concurrently. Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Cc: Omar Sandoval Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Reported-by: Andrew Jones Tested-by: Andrew Jones Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 85 +++----------------------------------------------- block/blk-mq.c | 10 ++---- include/linux/blkdev.h | 2 -- 3 files changed, 7 insertions(+), 90 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 56c493c6cd90..4e027f6108ae 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) return; - if (hctx->flags & BLK_MQ_F_TAG_SHARED) { - struct request_queue *q = hctx->queue; - - if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - atomic_inc(&q->shared_hctx_restart); - } else - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - return false; - - if (hctx->flags & BLK_MQ_F_TAG_SHARED) { - struct request_queue *q = hctx->queue; - - if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - atomic_dec(&q->shared_hctx_restart); - } else - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + return; + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - return blk_mq_run_hw_queue(hctx, true); + blk_mq_run_hw_queue(hctx, true); } /* @@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return false; } -/** - * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list - * @pos: loop cursor. - * @skip: the list element that will not be examined. Iteration starts at - * @skip->next. - * @head: head of the list to examine. This list must have at least one - * element, namely @skip. - * @member: name of the list_head structure within typeof(*pos). - */ -#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ - for ((pos) = (skip); \ - (pos = (pos)->member.next != (head) ? list_entry_rcu( \ - (pos)->member.next, typeof(*pos), member) : \ - list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ - (pos) != (skip); ) - -/* - * Called after a driver tag has been freed to check whether a hctx needs to - * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware - * queues in a round-robin fashion if the tag set of @hctx is shared with other - * hardware queues. - */ -void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) -{ - struct blk_mq_tags *const tags = hctx->tags; - struct blk_mq_tag_set *const set = hctx->queue->tag_set; - struct request_queue *const queue = hctx->queue, *q; - struct blk_mq_hw_ctx *hctx2; - unsigned int i, j; - - if (set->flags & BLK_MQ_F_TAG_SHARED) { - /* - * If this is 0, then we know that no hardware queues - * have RESTART marked. We're done. - */ - if (!atomic_read(&queue->shared_hctx_restart)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu_rr(q, queue, &set->tag_list, - tag_set_list) { - queue_for_each_hw_ctx(q, hctx2, i) - if (hctx2->tags == tags && - blk_mq_sched_restart_hctx(hctx2)) - goto done; - } - j = hctx->queue_num + 1; - for (i = 0; i < queue->nr_hw_queues; i++, j++) { - if (j == queue->nr_hw_queues) - j = 0; - hctx2 = queue->queue_hw_ctx[j]; - if (hctx2->tags == tags && - blk_mq_sched_restart_hctx(hctx2)) - break; - } -done: - rcu_read_unlock(); - } else { - blk_mq_sched_restart_hctx(hctx); - } -} - void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async) { diff --git a/block/blk-mq.c b/block/blk-mq.c index df84281f6af6..7c6ff13171ef 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) { - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - atomic_inc(&q->shared_hctx_restart); + if (shared) hctx->flags |= BLK_MQ_F_TAG_SHARED; - } else { - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - atomic_dec(&q->shared_hctx_restart); + else hctx->flags &= ~BLK_MQ_F_TAG_SHARED; - } } } @@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) blk_mq_update_tag_set_depth(set, false); } mutex_unlock(&set->tag_list_lock); - synchronize_rcu(); INIT_LIST_HEAD(&q->tag_set_list); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ca5a8b046894..9d05646d5059 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -442,8 +442,6 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ - atomic_t shared_hctx_restart; - struct blk_queue_stats *stats; struct rq_wb *rq_wb; -- cgit v1.2.3 From a79050434b45959f397042080fd1d70ffa9bd9df Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 09:32:35 -0600 Subject: blk-rq-qos: refactor out common elements of blk-wbt blkcg-qos is going to do essentially what wbt does, only on a cgroup basis. Break out the common code that will be shared between blkcg-qos and wbt into blk-rq-qos.* so they can both utilize the same infrastructure. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-core.c | 12 +- block/blk-mq.c | 12 +- block/blk-rq-qos.c | 178 +++++++++++++++++++++++++++ block/blk-rq-qos.h | 106 ++++++++++++++++ block/blk-settings.c | 4 +- block/blk-sysfs.c | 22 ++-- block/blk-wbt.c | 326 ++++++++++++++++++++++--------------------------- block/blk-wbt.h | 63 ++++------ include/linux/blkdev.h | 4 +- 10 files changed, 478 insertions(+), 251 deletions(-) create mode 100644 block/blk-rq-qos.c create mode 100644 block/blk-rq-qos.h (limited to 'include/linux/blkdev.h') diff --git a/block/Makefile b/block/Makefile index a8f94cdb75c3..57d0f47ab05f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o partition-generic.o ioprio.o \ - badblocks.o partitions/ + badblocks.o partitions/ blk-rq-qos.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o diff --git a/block/blk-core.c b/block/blk-core.c index 2ff8e131a892..b33a73bcf2d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); - wbt_requeue(q->rq_wb, rq); + rq_qos_requeue(q, rq); if (rq->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, rq); @@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) /* this is a bio leak */ WARN_ON(req->bio != NULL); - wbt_done(q->rq_wb, req); + rq_qos_done(q, req); /* * Request may not have originated from ll_rw_blk. if not, @@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) } get_rq: - wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock); + wb_acct = rq_qos_throttle(q, bio, q->queue_lock); /* * Grab a free request. This is might sleep but can not fail. @@ -2054,7 +2054,7 @@ get_rq: req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); if (IS_ERR(req)) { blk_queue_exit(q); - __wbt_done(q->rq_wb, wb_acct); + rq_qos_cleanup(q, wb_acct); if (PTR_ERR(req) == -ENOMEM) bio->bi_status = BLK_STS_RESOURCE; else @@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req) req->throtl_size = blk_rq_sectors(req); #endif req->rq_flags |= RQF_STATS; - wbt_issue(req->q->rq_wb, req); + rq_qos_issue(req->q, req); } BUG_ON(blk_rq_is_complete(req)); @@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error) blk_account_io_done(req, now); if (req->end_io) { - wbt_done(req->q->rq_wb, req); + rq_qos_done(q, req); req->end_io(req, error); } else { if (blk_bidi_rq(req)) diff --git a/block/blk-mq.c b/block/blk-mq.c index 850fdd02c385..ea2a226457fa 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq) if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->backing_dev_info); - wbt_done(q->rq_wb, rq); + rq_qos_done(q, rq); if (blk_rq_rl(rq)) blk_put_rl(blk_rq_rl(rq)); @@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) blk_account_io_done(rq, now); if (rq->end_io) { - wbt_done(rq->q->rq_wb, rq); + rq_qos_done(rq->q, rq); rq->end_io(rq, error); } else { if (unlikely(blk_bidi_rq(rq))) @@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq) rq->throtl_size = blk_rq_sectors(rq); #endif rq->rq_flags |= RQF_STATS; - wbt_issue(q->rq_wb, rq); + rq_qos_issue(q, rq); } WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); @@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); trace_block_rq_requeue(q, rq); - wbt_requeue(q->rq_wb, rq); + rq_qos_requeue(q, rq); if (blk_mq_request_started(rq)) { WRITE_ONCE(rq->state, MQ_RQ_IDLE); @@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (blk_mq_sched_bio_merge(q, bio)) return BLK_QC_T_NONE; - wb_acct = wbt_wait(q->rq_wb, bio, NULL); + wb_acct = rq_qos_throttle(q, bio, NULL); trace_block_getrq(q, bio, bio->bi_opf); rq = blk_mq_get_request(q, bio, bio->bi_opf, &data); if (unlikely(!rq)) { - __wbt_done(q->rq_wb, wb_acct); + rq_qos_cleanup(q, wb_acct); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); return BLK_QC_T_NONE; diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c new file mode 100644 index 000000000000..d2f2af8aa10c --- /dev/null +++ b/block/blk-rq-qos.c @@ -0,0 +1,178 @@ +#include "blk-rq-qos.h" + +#include "blk-wbt.h" + +/* + * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, + * false if 'v' + 1 would be bigger than 'below'. + */ +static bool atomic_inc_below(atomic_t *v, int below) +{ + int cur = atomic_read(v); + + for (;;) { + int old; + + if (cur >= below) + return false; + old = atomic_cmpxchg(v, cur, cur + 1); + if (old == cur) + break; + cur = old; + } + + return true; +} + +bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit) +{ + return atomic_inc_below(&rq_wait->inflight, limit); +} + +void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct) +{ + struct rq_qos *rqos; + + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->ops->cleanup) + rqos->ops->cleanup(rqos, wb_acct); + } +} + +void rq_qos_done(struct request_queue *q, struct request *rq) +{ + struct rq_qos *rqos; + + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->ops->done) + rqos->ops->done(rqos, rq); + } +} + +void rq_qos_issue(struct request_queue *q, struct request *rq) +{ + struct rq_qos *rqos; + + for(rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->ops->issue) + rqos->ops->issue(rqos, rq); + } +} + +void rq_qos_requeue(struct request_queue *q, struct request *rq) +{ + struct rq_qos *rqos; + + for(rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->ops->requeue) + rqos->ops->requeue(rqos, rq); + } +} + +enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio, + spinlock_t *lock) +{ + struct rq_qos *rqos; + enum wbt_flags flags = 0; + + for(rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->ops->throttle) + flags |= rqos->ops->throttle(rqos, bio, lock); + } + return flags; +} + +/* + * Return true, if we can't increase the depth further by scaling + */ +bool rq_depth_calc_max_depth(struct rq_depth *rqd) +{ + unsigned int depth; + bool ret = false; + + /* + * For QD=1 devices, this is a special case. It's important for those + * to have one request ready when one completes, so force a depth of + * 2 for those devices. On the backend, it'll be a depth of 1 anyway, + * since the device can't have more than that in flight. If we're + * scaling down, then keep a setting of 1/1/1. + */ + if (rqd->queue_depth == 1) { + if (rqd->scale_step > 0) + rqd->max_depth = 1; + else { + rqd->max_depth = 2; + ret = true; + } + } else { + /* + * scale_step == 0 is our default state. If we have suffered + * latency spikes, step will be > 0, and we shrink the + * allowed write depths. If step is < 0, we're only doing + * writes, and we allow a temporarily higher depth to + * increase performance. + */ + depth = min_t(unsigned int, rqd->default_depth, + rqd->queue_depth); + if (rqd->scale_step > 0) + depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); + else if (rqd->scale_step < 0) { + unsigned int maxd = 3 * rqd->queue_depth / 4; + + depth = 1 + ((depth - 1) << -rqd->scale_step); + if (depth > maxd) { + depth = maxd; + ret = true; + } + } + + rqd->max_depth = depth; + } + + return ret; +} + +void rq_depth_scale_up(struct rq_depth *rqd) +{ + /* + * Hit max in previous round, stop here + */ + if (rqd->scaled_max) + return; + + rqd->scale_step--; + + rqd->scaled_max = rq_depth_calc_max_depth(rqd); +} + +/* + * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we + * had a latency violation. + */ +void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) +{ + /* + * Stop scaling down when we've hit the limit. This also prevents + * ->scale_step from going to crazy values, if the device can't + * keep up. + */ + if (rqd->max_depth == 1) + return; + + if (rqd->scale_step < 0 && hard_throttle) + rqd->scale_step = 0; + else + rqd->scale_step++; + + rqd->scaled_max = false; + rq_depth_calc_max_depth(rqd); +} + +void rq_qos_exit(struct request_queue *q) +{ + while (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + q->rq_qos = rqos->next; + rqos->ops->exit(rqos); + } +} diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h new file mode 100644 index 000000000000..f9a39bd6ece3 --- /dev/null +++ b/block/blk-rq-qos.h @@ -0,0 +1,106 @@ +#ifndef RQ_QOS_H +#define RQ_QOS_H + +#include +#include +#include +#include +#include + +enum rq_qos_id { + RQ_QOS_WBT, + RQ_QOS_CGROUP, +}; + +struct rq_wait { + wait_queue_head_t wait; + atomic_t inflight; +}; + +struct rq_qos { + struct rq_qos_ops *ops; + struct request_queue *q; + enum rq_qos_id id; + struct rq_qos *next; +}; + +struct rq_qos_ops { + enum wbt_flags (*throttle)(struct rq_qos *, struct bio *, + spinlock_t *); + void (*issue)(struct rq_qos *, struct request *); + void (*requeue)(struct rq_qos *, struct request *); + void (*done)(struct rq_qos *, struct request *); + void (*cleanup)(struct rq_qos *, enum wbt_flags); + void (*exit)(struct rq_qos *); +}; + +struct rq_depth { + unsigned int max_depth; + + int scale_step; + bool scaled_max; + + unsigned int queue_depth; + unsigned int default_depth; +}; + +static inline struct rq_qos *rq_qos_id(struct request_queue *q, + enum rq_qos_id id) +{ + struct rq_qos *rqos; + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->id == id) + break; + } + return rqos; +} + +static inline struct rq_qos *wbt_rq_qos(struct request_queue *q) +{ + return rq_qos_id(q, RQ_QOS_WBT); +} + +static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q) +{ + return rq_qos_id(q, RQ_QOS_CGROUP); +} + +static inline void rq_wait_init(struct rq_wait *rq_wait) +{ + atomic_set(&rq_wait->inflight, 0); + init_waitqueue_head(&rq_wait->wait); +} + +static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) +{ + rqos->next = q->rq_qos; + q->rq_qos = rqos; +} + +static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) +{ + struct rq_qos *cur, *prev = NULL; + for (cur = q->rq_qos; cur; cur = cur->next) { + if (cur == rqos) { + if (prev) + prev->next = rqos->next; + else + q->rq_qos = cur; + break; + } + prev = cur; + } +} + +bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit); +void rq_depth_scale_up(struct rq_depth *rqd); +void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); +bool rq_depth_calc_max_depth(struct rq_depth *rqd); + +void rq_qos_cleanup(struct request_queue *, enum wbt_flags); +void rq_qos_done(struct request_queue *, struct request *); +void rq_qos_issue(struct request_queue *, struct request *); +void rq_qos_requeue(struct request_queue *, struct request *); +enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *); +void rq_qos_exit(struct request_queue *); +#endif diff --git a/block/blk-settings.c b/block/blk-settings.c index d1de71124656..053de87d1fda 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); void blk_set_queue_depth(struct request_queue *q, unsigned int depth) { q->queue_depth = depth; - wbt_set_queue_depth(q->rq_wb, depth); + wbt_set_queue_depth(q, depth); } EXPORT_SYMBOL(blk_set_queue_depth); @@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); - wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); + wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); } EXPORT_SYMBOL_GPL(blk_queue_write_cache); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 94987b1f69e1..49c29a5d06bb 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) { - if (!q->rq_wb) + if (!wbt_rq_qos(q)) return -EINVAL; - return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000)); + return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); } static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, size_t count) { - struct rq_wb *rwb; + struct rq_qos *rqos; ssize_t ret; s64 val; @@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, if (val < -1) return -EINVAL; - rwb = q->rq_wb; - if (!rwb) { + rqos = wbt_rq_qos(q); + if (!rqos) { ret = wbt_init(q); if (ret) return ret; } - rwb = q->rq_wb; if (val == -1) - rwb->min_lat_nsec = wbt_default_latency_nsec(q); + val = wbt_default_latency_nsec(q); else if (val >= 0) - rwb->min_lat_nsec = val * 1000ULL; + val *= 1000ULL; - if (rwb->enable_state == WBT_STATE_ON_DEFAULT) - rwb->enable_state = WBT_STATE_ON_MANUAL; + wbt_set_min_lat(q, val); - wbt_update_limits(rwb); + wbt_update_limits(q); return count; } @@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk) kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); - wbt_exit(q); + rq_qos_exit(q); mutex_lock(&q->sysfs_lock); if (q->request_fn || (q->mq_ops && q->elevator)) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4f89b28fa652..6fe20fb823e4 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -25,6 +25,7 @@ #include #include "blk-wbt.h" +#include "blk-rq-qos.h" #define CREATE_TRACE_POINTS #include @@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb) return rwb && rwb->wb_normal != 0; } -/* - * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, - * false if 'v' + 1 would be bigger than 'below'. - */ -static bool atomic_inc_below(atomic_t *v, int below) -{ - int cur = atomic_read(v); - - for (;;) { - int old; - - if (cur >= below) - return false; - old = atomic_cmpxchg(v, cur, cur + 1); - if (old == cur) - break; - cur = old; - } - - return true; -} - static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) { if (rwb_enabled(rwb)) { @@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb; + struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; return time_before(jiffies, wb->dirty_sleep + HZ); } @@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb) } } -void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) { + struct rq_wb *rwb = RQWB(rqos); struct rq_wait *rqw; int inflight, limit; @@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. */ -void wbt_done(struct rq_wb *rwb, struct request *rq) +static void wbt_done(struct rq_qos *rqos, struct request *rq) { - if (!rwb) - return; + struct rq_wb *rwb = RQWB(rqos); if (!wbt_is_tracked(rq)) { if (rwb->sync_cookie == rq) { @@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq) wb_timestamp(rwb, &rwb->last_comp); } else { WARN_ON_ONCE(rq == rwb->sync_cookie); - __wbt_done(rwb, wbt_flags(rq)); + __wbt_done(rqos, wbt_flags(rq)); } wbt_clear_state(rq); } -/* - * Return true, if we can't increase the depth further by scaling - */ -static bool calc_wb_limits(struct rq_wb *rwb) -{ - unsigned int depth; - bool ret = false; - - if (!rwb->min_lat_nsec) { - rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0; - return false; - } - - /* - * For QD=1 devices, this is a special case. It's important for those - * to have one request ready when one completes, so force a depth of - * 2 for those devices. On the backend, it'll be a depth of 1 anyway, - * since the device can't have more than that in flight. If we're - * scaling down, then keep a setting of 1/1/1. - */ - if (rwb->queue_depth == 1) { - if (rwb->scale_step > 0) - rwb->wb_max = rwb->wb_normal = 1; - else { - rwb->wb_max = rwb->wb_normal = 2; - ret = true; - } - rwb->wb_background = 1; - } else { - /* - * scale_step == 0 is our default state. If we have suffered - * latency spikes, step will be > 0, and we shrink the - * allowed write depths. If step is < 0, we're only doing - * writes, and we allow a temporarily higher depth to - * increase performance. - */ - depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth); - if (rwb->scale_step > 0) - depth = 1 + ((depth - 1) >> min(31, rwb->scale_step)); - else if (rwb->scale_step < 0) { - unsigned int maxd = 3 * rwb->queue_depth / 4; - - depth = 1 + ((depth - 1) << -rwb->scale_step); - if (depth > maxd) { - depth = maxd; - ret = true; - } - } - - /* - * Set our max/normal/bg queue depths based on how far - * we have scaled down (->scale_step). - */ - rwb->wb_max = depth; - rwb->wb_normal = (rwb->wb_max + 1) / 2; - rwb->wb_background = (rwb->wb_max + 3) / 4; - } - - return ret; -} - static inline bool stat_sample_valid(struct blk_rq_stat *stat) { /* @@ -307,7 +225,8 @@ enum { static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) { - struct backing_dev_info *bdi = rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; + struct rq_depth *rqd = &rwb->rq_depth; u64 thislat; /* @@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) return LAT_EXCEEDED; } - if (rwb->scale_step) + if (rqd->scale_step) trace_wbt_stat(bdi, stat); return LAT_OK; @@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { - struct backing_dev_info *bdi = rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; + struct rq_depth *rqd = &rwb->rq_depth; - trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, - rwb->wb_background, rwb->wb_normal, rwb->wb_max); + trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, + rwb->wb_background, rwb->wb_normal, rqd->max_depth); } -static void scale_up(struct rq_wb *rwb) +static void calc_wb_limits(struct rq_wb *rwb) { - /* - * Hit max in previous round, stop here - */ - if (rwb->scaled_max) - return; + if (rwb->min_lat_nsec == 0) { + rwb->wb_normal = rwb->wb_background = 0; + } else if (rwb->rq_depth.max_depth <= 2) { + rwb->wb_normal = rwb->rq_depth.max_depth; + rwb->wb_background = 1; + } else { + rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; + rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; + } +} - rwb->scale_step--; +static void scale_up(struct rq_wb *rwb) +{ + rq_depth_scale_up(&rwb->rq_depth); + calc_wb_limits(rwb); rwb->unknown_cnt = 0; - - rwb->scaled_max = calc_wb_limits(rwb); - - rwb_wake_all(rwb); - - rwb_trace_step(rwb, "step up"); + rwb_trace_step(rwb, "scale up"); } -/* - * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we - * had a latency violation. - */ static void scale_down(struct rq_wb *rwb, bool hard_throttle) { - /* - * Stop scaling down when we've hit the limit. This also prevents - * ->scale_step from going to crazy values, if the device can't - * keep up. - */ - if (rwb->wb_max == 1) - return; - - if (rwb->scale_step < 0 && hard_throttle) - rwb->scale_step = 0; - else - rwb->scale_step++; - - rwb->scaled_max = false; - rwb->unknown_cnt = 0; + rq_depth_scale_down(&rwb->rq_depth, hard_throttle); calc_wb_limits(rwb); - rwb_trace_step(rwb, "step down"); + rwb->unknown_cnt = 0; + rwb_wake_all(rwb); + rwb_trace_step(rwb, "scale down"); } static void rwb_arm_timer(struct rq_wb *rwb) { - if (rwb->scale_step > 0) { + struct rq_depth *rqd = &rwb->rq_depth; + + if (rqd->scale_step > 0) { /* * We should speed this up, using some variant of a fast * integer inverse square root calculation. Since we only do @@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb) * though. */ rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, - int_sqrt((rwb->scale_step + 1) << 8)); + int_sqrt((rqd->scale_step + 1) << 8)); } else { /* * For step < 0, we don't want to increase/decrease the @@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb) static void wb_timer_fn(struct blk_stat_callback *cb) { struct rq_wb *rwb = cb->data; + struct rq_depth *rqd = &rwb->rq_depth; unsigned int inflight = wbt_inflight(rwb); int status; status = latency_exceeded(rwb, cb->stat); - trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step, + trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, inflight); /* @@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb) * currently don't have a valid read/write sample. For that * case, slowly return to center state (step == 0). */ - if (rwb->scale_step > 0) + if (rqd->scale_step > 0) scale_up(rwb); - else if (rwb->scale_step < 0) + else if (rqd->scale_step < 0) scale_down(rwb, false); break; default: @@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb) /* * Re-arm timer, if we have IO in flight */ - if (rwb->scale_step || inflight) + if (rqd->scale_step || inflight) rwb_arm_timer(rwb); } -void wbt_update_limits(struct rq_wb *rwb) +static void __wbt_update_limits(struct rq_wb *rwb) { - rwb->scale_step = 0; - rwb->scaled_max = false; + struct rq_depth *rqd = &rwb->rq_depth; + + rqd->scale_step = 0; + rqd->scaled_max = false; + + rq_depth_calc_max_depth(rqd); calc_wb_limits(rwb); rwb_wake_all(rwb); } +void wbt_update_limits(struct request_queue *q) +{ + struct rq_qos *rqos = wbt_rq_qos(q); + if (!rqos) + return; + __wbt_update_limits(RQWB(rqos)); +} + +u64 wbt_get_min_lat(struct request_queue *q) +{ + struct rq_qos *rqos = wbt_rq_qos(q); + if (!rqos) + return 0; + return RQWB(rqos)->min_lat_nsec; +} + +void wbt_set_min_lat(struct request_queue *q, u64 val) +{ + struct rq_qos *rqos = wbt_rq_qos(q); + if (!rqos) + return; + RQWB(rqos)->min_lat_nsec = val; + RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; + __wbt_update_limits(RQWB(rqos)); +} + + static bool close_io(struct rq_wb *rwb) { const unsigned long now = jiffies; @@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) * IO for a bit. */ if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) - limit = rwb->wb_max; + limit = rwb->rq_depth.max_depth; else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { /* * If less than 100ms since we completed unrelated IO, @@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, rqw->wait.head.next != &wait->entry) return false; - return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)); + return rq_wait_inc_below(rqw, get_limit(rwb, rw)); } /* @@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. */ -enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) +static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio, + spinlock_t *lock) { + struct rq_wb *rwb = RQWB(rqos); enum wbt_flags ret = 0; if (!rwb_enabled(rwb)) @@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) return ret | WBT_TRACKED; } -void wbt_issue(struct rq_wb *rwb, struct request *rq) +void wbt_issue(struct rq_qos *rqos, struct request *rq) { + struct rq_wb *rwb = RQWB(rqos); + if (!rwb_enabled(rwb)) return; @@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq) } } -void wbt_requeue(struct rq_wb *rwb, struct request *rq) +void wbt_requeue(struct rq_qos *rqos, struct request *rq) { + struct rq_wb *rwb = RQWB(rqos); if (!rwb_enabled(rwb)) return; if (rq == rwb->sync_cookie) { @@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq) } } -void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) +void wbt_set_queue_depth(struct request_queue *q, unsigned int depth) { - if (rwb) { - rwb->queue_depth = depth; - wbt_update_limits(rwb); + struct rq_qos *rqos = wbt_rq_qos(q); + if (rqos) { + RQWB(rqos)->rq_depth.queue_depth = depth; + __wbt_update_limits(RQWB(rqos)); } } -void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on) -{ - if (rwb) - rwb->wc = write_cache_on; -} - -/* - * Disable wbt, if enabled by default. - */ -void wbt_disable_default(struct request_queue *q) +void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) { - struct rq_wb *rwb = q->rq_wb; - - if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) - wbt_exit(q); + struct rq_qos *rqos = wbt_rq_qos(q); + if (rqos) + RQWB(rqos)->wc = write_cache_on; } -EXPORT_SYMBOL_GPL(wbt_disable_default); /* * Enable wbt if defaults are configured that way */ void wbt_enable_default(struct request_queue *q) { + struct rq_qos *rqos = wbt_rq_qos(q); /* Throttling already enabled? */ - if (q->rq_wb) + if (rqos) return; /* Queue not registered? Maybe shutting down... */ @@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq) return -1; } +static void wbt_exit(struct rq_qos *rqos) +{ + struct rq_wb *rwb = RQWB(rqos); + struct request_queue *q = rqos->q; + + blk_stat_remove_callback(q, rwb->cb); + blk_stat_free_callback(rwb->cb); + kfree(rwb); +} + +/* + * Disable wbt, if enabled by default. + */ +void wbt_disable_default(struct request_queue *q) +{ + struct rq_qos *rqos = wbt_rq_qos(q); + struct rq_wb *rwb; + if (!rqos) + return; + rwb = RQWB(rqos); + if (rwb->enable_state == WBT_STATE_ON_DEFAULT) + rwb->wb_normal = 0; +} +EXPORT_SYMBOL_GPL(wbt_disable_default); + + +static struct rq_qos_ops wbt_rqos_ops = { + .throttle = wbt_wait, + .issue = wbt_issue, + .requeue = wbt_requeue, + .done = wbt_done, + .cleanup = __wbt_done, + .exit = wbt_exit, +}; + int wbt_init(struct request_queue *q) { struct rq_wb *rwb; @@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q) return -ENOMEM; } - for (i = 0; i < WBT_NUM_RWQ; i++) { - atomic_set(&rwb->rq_wait[i].inflight, 0); - init_waitqueue_head(&rwb->rq_wait[i].wait); - } + for (i = 0; i < WBT_NUM_RWQ; i++) + rq_wait_init(&rwb->rq_wait[i]); + rwb->rqos.id = RQ_QOS_WBT; + rwb->rqos.ops = &wbt_rqos_ops; + rwb->rqos.q = q; rwb->last_comp = rwb->last_issue = jiffies; - rwb->queue = q; rwb->win_nsec = RWB_WINDOW_NSEC; rwb->enable_state = WBT_STATE_ON_DEFAULT; - wbt_update_limits(rwb); + rwb->wc = 1; + rwb->rq_depth.default_depth = RWB_DEF_DEPTH; + __wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. */ - q->rq_wb = rwb; + rq_qos_add(q, &rwb->rqos); blk_stat_add_callback(q, rwb->cb); rwb->min_lat_nsec = wbt_default_latency_nsec(q); - wbt_set_queue_depth(rwb, blk_queue_depth(q)); - wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); + wbt_set_queue_depth(q, blk_queue_depth(q)); + wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); return 0; } - -void wbt_exit(struct request_queue *q) -{ - struct rq_wb *rwb = q->rq_wb; - - if (rwb) { - blk_stat_remove_callback(q, rwb->cb); - blk_stat_free_callback(rwb->cb); - q->rq_wb = NULL; - kfree(rwb); - } -} diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 300df531d0a6..53b20a58c0a2 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -9,6 +9,7 @@ #include #include "blk-stat.h" +#include "blk-rq-qos.h" enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ @@ -35,20 +36,12 @@ enum { WBT_STATE_ON_MANUAL = 2, }; -struct rq_wait { - wait_queue_head_t wait; - atomic_t inflight; -}; - struct rq_wb { /* * Settings that govern how we throttle */ unsigned int wb_background; /* background writeback */ unsigned int wb_normal; /* normal writeback */ - unsigned int wb_max; /* max throughput writeback */ - int scale_step; - bool scaled_max; short enable_state; /* WBT_STATE_* */ @@ -67,15 +60,20 @@ struct rq_wb { void *sync_cookie; unsigned int wc; - unsigned int queue_depth; unsigned long last_issue; /* last non-throttled issue */ unsigned long last_comp; /* last non-throttled comp */ unsigned long min_lat_nsec; - struct request_queue *queue; + struct rq_qos rqos; struct rq_wait rq_wait[WBT_NUM_RWQ]; + struct rq_depth rq_depth; }; +static inline struct rq_wb *RQWB(struct rq_qos *rqos) +{ + return container_of(rqos, struct rq_wb, rqos); +} + static inline unsigned int wbt_inflight(struct rq_wb *rwb) { unsigned int i, ret = 0; @@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb) return ret; } + #ifdef CONFIG_BLK_WBT static inline void wbt_track(struct request *rq, enum wbt_flags flags) @@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags) rq->wbt_flags |= flags; } -void __wbt_done(struct rq_wb *, enum wbt_flags); -void wbt_done(struct rq_wb *, struct request *); -enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *); int wbt_init(struct request_queue *); -void wbt_exit(struct request_queue *); -void wbt_update_limits(struct rq_wb *); -void wbt_requeue(struct rq_wb *, struct request *); -void wbt_issue(struct rq_wb *, struct request *); +void wbt_update_limits(struct request_queue *); void wbt_disable_default(struct request_queue *); void wbt_enable_default(struct request_queue *); -void wbt_set_queue_depth(struct rq_wb *, unsigned int); -void wbt_set_write_cache(struct rq_wb *, bool); +u64 wbt_get_min_lat(struct request_queue *q); +void wbt_set_min_lat(struct request_queue *q, u64 val); + +void wbt_set_queue_depth(struct request_queue *, unsigned int); +void wbt_set_write_cache(struct request_queue *, bool); u64 wbt_default_latency_nsec(struct request_queue *); @@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *); static inline void wbt_track(struct request *rq, enum wbt_flags flags) { } -static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags) -{ -} -static inline void wbt_done(struct rq_wb *rwb, struct request *rq) -{ -} -static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, - spinlock_t *lock) -{ - return 0; -} static inline int wbt_init(struct request_queue *q) { return -EINVAL; } -static inline void wbt_exit(struct request_queue *q) -{ -} -static inline void wbt_update_limits(struct rq_wb *rwb) +static inline void wbt_update_limits(struct request_queue *q) { } -static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq) +static inline void wbt_disable_default(struct request_queue *q) { } -static inline void wbt_issue(struct rq_wb *rwb, struct request *rq) +static inline void wbt_enable_default(struct request_queue *q) { } -static inline void wbt_disable_default(struct request_queue *q) +static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth) { } -static inline void wbt_enable_default(struct request_queue *q) +static inline void wbt_set_write_cache(struct request_queue *q, bool wc) { } -static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) +static inline u64 wbt_get_min_lat(struct request_queue *q) { + return 0; } -static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc) +static inline void wbt_set_min_lat(struct request_queue *q, u64 val) { } static inline u64 wbt_default_latency_nsec(struct request_queue *q) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d05646d5059..137759862f07 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -42,7 +42,7 @@ struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; -struct rq_wb; +struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; @@ -443,7 +443,7 @@ struct request_queue { int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; - struct rq_wb *rq_wb; + struct rq_qos *rq_qos; /* * If blkcg is not used, @q->root_rl serves all requests. If blkcg -- cgit v1.2.3 From 05814a10370b3252fe2b0898b6adac3cdd531096 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 13 Jul 2018 17:07:26 +0300 Subject: block: remove blkdev_entry_to_request() macro Remove blkdev_entry_to_request() macro, which remained unused through the observable history, also note that it repeats list_entry_rq() macro verbatim. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 137759862f07..1939ed95f936 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1436,8 +1436,6 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) - static inline unsigned long queue_segment_boundary(struct request_queue *q) { return q->limits.seg_boundary_mask; -- cgit v1.2.3 From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Jul 2018 04:47:36 -0700 Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for read/write") replaced @op with boolean @is_write, which limited the amount of information going into ->rw_page() and more importantly page_endio(), which removed the need to expose block internals to mm. Unfortunately, we want to track discards separately and @is_write isn't enough information. This patch updates bdev_ops->rw_page() to take REQ_OP instead but leaves page_endio() to take bool @is_write. This allows the block part of operations to have enough information while not leaking it to mm. Signed-off-by: Tejun Heo Cc: Mike Christie Cc: Minchan Kim Cc: Dan Williams Signed-off-by: Jens Axboe --- drivers/block/brd.c | 14 +++++++------- drivers/block/zram/zram_drv.c | 16 ++++++++-------- drivers/nvdimm/btt.c | 12 ++++++------ drivers/nvdimm/pmem.c | 13 ++++++------- fs/block_dev.c | 6 ++++-- fs/mpage.c | 4 ++-- include/linux/blkdev.h | 2 +- 7 files changed, 34 insertions(+), 33 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index bb976598ee43..df8103dd40ac 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, bool is_write, + unsigned int len, unsigned int off, unsigned int op, sector_t sector) { void *mem; int err = 0; - if (is_write) { + if (op_is_write(op)) { err = copy_to_brd_setup(brd, sector, len); if (err) goto out; } mem = kmap_atomic(page); - if (!is_write) { + if (!op_is_write(op)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) int err; err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector); + bio_op(bio), sector); if (err) goto io_error; sector += len >> SECTOR_SHIFT; @@ -310,15 +310,15 @@ io_error: } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct brd_device *brd = bdev->bd_disk->private_data; int err; if (PageTransHuge(page)) return -ENOTSUPP; - err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); - page_endio(page, is_write, err); + err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op_is_write(op), err); return err; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7436b2d27fa3..78c29044684a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index, * Returns 1 if IO request was successfully submitted. */ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, bool is_write, struct bio *bio) + int offset, unsigned int op, struct bio *bio) { unsigned long start_time = jiffies; - int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; + int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ; struct request_queue *q = zram->disk->queue; int ret; generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); - if (!is_write) { + if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); @@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, zram_slot_unlock(zram, index); if (unlikely(ret < 0)) { - if (!is_write) + if (!op_is_write(op)) atomic64_inc(&zram->stats.failed_reads); else atomic64_inc(&zram->stats.failed_writes); @@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - op_is_write(bio_op(bio)), bio) < 0) + bio_op(bio), bio) < 0) goto out; bv.bv_offset += bv.bv_len; @@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { int offset, ret; u32 index; @@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); + ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1429,7 +1429,7 @@ out: switch (ret) { case 0: - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); break; case 1: ret = 0; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 85de8053aa34..0360c015f658 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - bool is_write, sector_t sector) + unsigned int op, sector_t sector) { int ret; - if (!is_write) { + if (!op_is_write(op)) { ret = btt_read_pg(btt, bip, page, off, sector, len); flush_dcache_page(page); } else { @@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), iter.bi_sector); + bio_op(bio), iter.bi_sector); if (err) { dev_err(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", @@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct btt *btt = bdev->bd_disk->private_data; int rc; unsigned int len; len = hpage_nr_pages(page) * PAGE_SIZE; - rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector); + rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector); if (rc == 0) - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8b1fd7f1a224..dd17acd8fe68 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, } static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, bool is_write, + unsigned int len, unsigned int off, unsigned int op, sector_t sector) { blk_status_t rc = BLK_STS_OK; @@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (!is_write) { + if (!op_is_write(op)) { if (unlikely(bad_pmem)) rc = BLK_STS_IOERR; else { @@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, op_is_write(bio_op(bio)), - iter.bi_sector); + bvec.bv_offset, bio_op(bio), iter.bi_sector); if (rc) { bio->bi_status = rc; break; @@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct pmem_device *pmem = bdev->bd_queue->queuedata; blk_status_t rc; rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, - 0, is_write, sector); + 0, op, sector); /* * The ->rw_page interface is subtle and tricky. The core @@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, * caused by double completion. */ if (rc == 0) - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); return blk_status_to_errno(rc); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 0dd87aaeb39a..496fb51a1e1a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_READ); blk_queue_exit(bdev->bd_queue); return result; } @@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, return result; set_page_writeback(page); - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_WRITE); if (result) { end_page_writeback(page); } else { diff --git a/fs/mpage.c b/fs/mpage.c index b7e7f570733a..b73638db9866 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); + page_endio(page, bio_op(bio), + blk_status_to_errno(bio->bi_status)); } bio_put(bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1939ed95f936..331a6cb8805f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, bool); + int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, -- cgit v1.2.3 From 359f642700f2ff05d9c94cd9216c97af7b8e9553 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 25 Jul 2018 10:22:58 -0400 Subject: block: move bio_integrity_{intervals,bytes} into blkdev.h This allows bio_integrity_bytes() to be called from drivers instead of open coding it. Acked-by: Martin K. Petersen Signed-off-by: Greg Edwards Signed-off-by: Jens Axboe --- block/bio-integrity.c | 22 ---------------------- include/linux/blkdev.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 22 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index add7c7c85335..67b5fb861a51 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -159,28 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_integrity_add_page); -/** - * bio_integrity_intervals - Return number of integrity intervals for a bio - * @bi: blk_integrity profile for device - * @sectors: Size of the bio in 512-byte sectors - * - * Description: The block layer calculates everything in 512 byte - * sectors but integrity metadata is done in terms of the data integrity - * interval size of the storage device. Convert the block layer sectors - * to the appropriate number of integrity intervals. - */ -static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, - unsigned int sectors) -{ - return sectors >> (bi->interval_exp - 9); -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; -} - /** * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 331a6cb8805f..050d599f5ea9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1865,6 +1865,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req, bip_next->bip_vec[0].bv_offset); } +/** + * bio_integrity_intervals - Return number of integrity intervals for a bio + * @bi: blk_integrity profile for device + * @sectors: Size of the bio in 512-byte sectors + * + * Description: The block layer calculates everything in 512 byte + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device. Convert the block layer sectors + * to the appropriate number of integrity intervals. + */ +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return sectors >> (bi->interval_exp - 9); +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return bio_integrity_intervals(bi, sectors) * bi->tuple_size; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1938,6 +1960,18 @@ static inline bool integrity_req_gap_front_merge(struct request *req, return false; } +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ struct block_device_operations { -- cgit v1.2.3 From b1f4267cc5448d20ae0c515a74141e74365e78a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Aug 2018 07:47:28 -0700 Subject: block: Remove two superfluous #include directives Commit 12f5b9314545 ("blk-mq: Remove generation seqeunce") removed the only seqcount_t and u64_stats_sync instances from but did not remove the corresponding #include directives. Since these include directives are no longer needed, remove them. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Keith Busch Cc: Ming Lei Cc: Jianchao Wang Cc: Hannes Reinecke , Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 050d599f5ea9..d6869e0e2b64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,8 +27,6 @@ #include #include #include -#include -#include struct module; struct scsi_ioctl_command; -- cgit v1.2.3