From 6b1d83d274486615cc341e410467a98fd9c27c0a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:19 -0700
Subject: block: Remove bdev_nr_zones()

Remove this function since it has no callers. This function was
introduced in commit 6cc77e9cb080 ("block: introduce zoned block
devices zone write locking").

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matias Bjorling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..49a400afb146 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1639,15 +1639,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 	return 0;
 }
 
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return blk_queue_nr_zones(q);
-	return 0;
-}
-
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 7c8542b7982264226cf94102950343185869b584 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:20 -0700
Subject: block: Inline blk_queue_nr_zones()

Since the implementation of blk_queue_nr_zones() is trivial and since
it only has a single caller, inline this function.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 2 +-
 include/linux/blkdev.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1c4532e92938..26e1f8e425a8 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -214,7 +214,7 @@ static int queue_zone_wlock_show(void *data, struct seq_file *m)
 	if (!q->seq_zones_wlock)
 		return 0;
 
-	for (i = 0; i < blk_queue_nr_zones(q); i++)
+	for (i = 0; i < q->nr_zones; i++)
 		if (test_bit(i, q->seq_zones_wlock))
 			seq_printf(m, "%u\n", i);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49a400afb146..905daa7c647e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -800,11 +800,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
-	return q->nr_zones;
-}
-
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
-- 
cgit v1.2.3


From 6a5ac9846508ad7d1d23881d9d5add35f2e6ae71 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:21 -0700
Subject: block: Make struct request_queue smaller for CONFIG_BLK_DEV_ZONED=n

Exclude zoned block device members from struct request_queue for
CONFIG_BLK_DEV_ZONED == n. Avoid breaking the build by only building
the code that uses these struct request_queue members if
CONFIG_BLK_DEV_ZONED != n.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig                |  4 ++++
 block/Makefile               |  1 +
 block/blk-mq-debugfs-zoned.c | 24 ++++++++++++++++++++++++
 block/blk-mq-debugfs.c       | 15 ---------------
 block/blk-mq-debugfs.h       |  9 +++++++++
 include/linux/blkdev.h       |  6 ++++++
 6 files changed, 44 insertions(+), 15 deletions(-)
 create mode 100644 block/blk-mq-debugfs-zoned.c

(limited to 'include/linux/blkdev.h')

diff --git a/block/Kconfig b/block/Kconfig
index eb50fd4977c2..dfe7bc770fc9 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -177,6 +177,10 @@ config BLK_DEBUG_FS
 	Unless you are building a kernel for a tiny system, you should
 	say Y here.
 
+config BLK_DEBUG_FS_ZONED
+       bool
+       default BLK_DEBUG_FS && BLK_DEV_ZONED
+
 config BLK_SED_OPAL
 	bool "Logic for interfacing with Opal enabled SEDs"
 	---help---
diff --git a/block/Makefile b/block/Makefile
index 6a56303b9925..a8f94cdb75c3 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -34,4 +34,5 @@ obj-$(CONFIG_BLK_MQ_RDMA)	+= blk-mq-rdma.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
 obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
+obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c
new file mode 100644
index 000000000000..fb2c82c351e4
--- /dev/null
+++ b/block/blk-mq-debugfs-zoned.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 Western Digital Corporation or its affiliates.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/blkdev.h>
+#include "blk-mq-debugfs.h"
+
+int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	unsigned int i;
+
+	if (!q->seq_zones_wlock)
+		return 0;
+
+	for (i = 0; i < q->nr_zones; i++)
+		if (test_bit(i, q->seq_zones_wlock))
+			seq_printf(m, "%u\n", i);
+
+	return 0;
+}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 26e1f8e425a8..7efe268e4447 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -206,21 +206,6 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_zone_wlock_show(void *data, struct seq_file *m)
-{
-	struct request_queue *q = data;
-	unsigned int i;
-
-	if (!q->seq_zones_wlock)
-		return 0;
-
-	for (i = 0; i < q->nr_zones; i++)
-		if (test_bit(i, q->seq_zones_wlock))
-			seq_printf(m, "%u\n", i);
-
-	return 0;
-}
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index b9d366e57097..a9160be12be0 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -80,4 +80,13 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc
 }
 #endif
 
+#ifdef CONFIG_BLK_DEBUG_FS_ZONED
+int queue_zone_wlock_show(void *data, struct seq_file *m);
+#else
+static inline int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 905daa7c647e..ca5a8b046894 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -592,6 +592,7 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
+#ifdef CONFIG_BLK_DEV_ZONED
 	/*
 	 * Zoned block device information for request dispatch control.
 	 * nr_zones is the total number of zones of the device. This is always
@@ -612,6 +613,7 @@ struct request_queue {
 	unsigned int		nr_zones;
 	unsigned long		*seq_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 	/*
 	 * sg stuff
@@ -800,6 +802,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
@@ -815,6 +818,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return false;
 	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)
 {
@@ -1065,6 +1069,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
 	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1074,6 +1079,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 {
 	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 /*
  * Some commands like WRITE SAME have a payload or data transfer size which
-- 
cgit v1.2.3


From 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 25 Jun 2018 19:31:48 +0800
Subject: blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()

We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().

This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:

1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.

2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.

In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.

Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   | 85 +++-----------------------------------------------
 block/blk-mq.c         | 10 ++----
 include/linux/blkdev.h |  2 --
 3 files changed, 7 insertions(+), 90 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 56c493c6cd90..4e027f6108ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		return;
 
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_inc(&q->shared_hctx_restart);
-	} else
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 {
 	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		return false;
-
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_dec(&q->shared_hctx_restart);
-	} else
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+		return;
+	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-	return blk_mq_run_hw_queue(hctx, true);
+	blk_mq_run_hw_queue(hctx, true);
 }
 
 /*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-/**
- * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
- * @pos:    loop cursor.
- * @skip:   the list element that will not be examined. Iteration starts at
- *          @skip->next.
- * @head:   head of the list to examine. This list must have at least one
- *          element, namely @skip.
- * @member: name of the list_head structure within typeof(*pos).
- */
-#define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
-	for ((pos) = (skip);						\
-	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
-			(pos)->member.next, typeof(*pos), member) :	\
-	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
-	     (pos) != (skip); )
-
-/*
- * Called after a driver tag has been freed to check whether a hctx needs to
- * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
- * queues in a round-robin fashion if the tag set of @hctx is shared with other
- * hardware queues.
- */
-void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
-{
-	struct blk_mq_tags *const tags = hctx->tags;
-	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
-	struct request_queue *const queue = hctx->queue, *q;
-	struct blk_mq_hw_ctx *hctx2;
-	unsigned int i, j;
-
-	if (set->flags & BLK_MQ_F_TAG_SHARED) {
-		/*
-		 * If this is 0, then we know that no hardware queues
-		 * have RESTART marked. We're done.
-		 */
-		if (!atomic_read(&queue->shared_hctx_restart))
-			return;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
-					   tag_set_list) {
-			queue_for_each_hw_ctx(q, hctx2, i)
-				if (hctx2->tags == tags &&
-				    blk_mq_sched_restart_hctx(hctx2))
-					goto done;
-		}
-		j = hctx->queue_num + 1;
-		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
-			if (j == queue->nr_hw_queues)
-				j = 0;
-			hctx2 = queue->queue_hw_ctx[j];
-			if (hctx2->tags == tags &&
-			    blk_mq_sched_restart_hctx(hctx2))
-				break;
-		}
-done:
-		rcu_read_unlock();
-	} else {
-		blk_mq_sched_restart_hctx(hctx);
-	}
-}
-
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 				 bool run_queue, bool async)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df84281f6af6..7c6ff13171ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared) {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_inc(&q->shared_hctx_restart);
+		if (shared)
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		} else {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_dec(&q->shared_hctx_restart);
+		else
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-		}
 	}
 }
 
@@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 		blk_mq_update_tag_set_depth(set, false);
 	}
 	mutex_unlock(&set->tag_list_lock);
-	synchronize_rcu();
 	INIT_LIST_HEAD(&q->tag_set_list);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca5a8b046894..9d05646d5059 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -442,8 +442,6 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
-	atomic_t		shared_hctx_restart;
-
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
-- 
cgit v1.2.3


From a79050434b45959f397042080fd1d70ffa9bd9df Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 09:32:35 -0600
Subject: blk-rq-qos: refactor out common elements of blk-wbt

blkcg-qos is going to do essentially what wbt does, only on a cgroup
basis.  Break out the common code that will be shared between blkcg-qos
and wbt into blk-rq-qos.* so they can both utilize the same
infrastructure.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile         |   2 +-
 block/blk-core.c       |  12 +-
 block/blk-mq.c         |  12 +-
 block/blk-rq-qos.c     | 178 +++++++++++++++++++++++++++
 block/blk-rq-qos.h     | 106 ++++++++++++++++
 block/blk-settings.c   |   4 +-
 block/blk-sysfs.c      |  22 ++--
 block/blk-wbt.c        | 326 ++++++++++++++++++++++---------------------------
 block/blk-wbt.h        |  63 ++++------
 include/linux/blkdev.h |   4 +-
 10 files changed, 478 insertions(+), 251 deletions(-)
 create mode 100644 block/blk-rq-qos.c
 create mode 100644 block/blk-rq-qos.h

(limited to 'include/linux/blkdev.h')

diff --git a/block/Makefile b/block/Makefile
index a8f94cdb75c3..57d0f47ab05f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o partition-generic.o ioprio.o \
-			badblocks.o partitions/
+			badblocks.o partitions/ blk-rq-qos.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ff8e131a892..b33a73bcf2d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
-	wbt_done(q->rq_wb, req);
+	rq_qos_done(q, req);
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
@@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
-	wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+	wb_acct = rq_qos_throttle(q, bio, q->queue_lock);
 
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
@@ -2054,7 +2054,7 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
 	if (IS_ERR(req)) {
 		blk_queue_exit(q);
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (PTR_ERR(req) == -ENOMEM)
 			bio->bi_status = BLK_STS_RESOURCE;
 		else
@@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req)
 		req->throtl_size = blk_rq_sectors(req);
 #endif
 		req->rq_flags |= RQF_STATS;
-		wbt_issue(req->q->rq_wb, req);
+		rq_qos_issue(req->q, req);
 	}
 
 	BUG_ON(blk_rq_is_complete(req));
@@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
 	blk_account_io_done(req, now);
 
 	if (req->end_io) {
-		wbt_done(req->q->rq_wb, req);
+		rq_qos_done(q, req);
 		req->end_io(req, error);
 	} else {
 		if (blk_bidi_rq(req))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 850fdd02c385..ea2a226457fa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq)
 	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
 		laptop_io_completion(q->backing_dev_info);
 
-	wbt_done(q->rq_wb, rq);
+	rq_qos_done(q, rq);
 
 	if (blk_rq_rl(rq))
 		blk_put_rl(blk_rq_rl(rq));
@@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 	blk_account_io_done(rq, now);
 
 	if (rq->end_io) {
-		wbt_done(rq->q->rq_wb, rq);
+		rq_qos_done(rq->q, rq);
 		rq->end_io(rq, error);
 	} else {
 		if (unlikely(blk_bidi_rq(rq)))
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq)
 		rq->throtl_size = blk_rq_sectors(rq);
 #endif
 		rq->rq_flags |= RQF_STATS;
-		wbt_issue(q->rq_wb, rq);
+		rq_qos_issue(q, rq);
 	}
 
 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq)
 	blk_mq_put_driver_tag(rq);
 
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (blk_mq_request_started(rq)) {
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
@@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (blk_mq_sched_bio_merge(q, bio))
 		return BLK_QC_T_NONE;
 
-	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+	wb_acct = rq_qos_throttle(q, bio, NULL);
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
 	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
new file mode 100644
index 000000000000..d2f2af8aa10c
--- /dev/null
+++ b/block/blk-rq-qos.c
@@ -0,0 +1,178 @@
+#include "blk-rq-qos.h"
+
+#include "blk-wbt.h"
+
+/*
+ * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
+ * false if 'v' + 1 would be bigger than 'below'.
+ */
+static bool atomic_inc_below(atomic_t *v, int below)
+{
+	int cur = atomic_read(v);
+
+	for (;;) {
+		int old;
+
+		if (cur >= below)
+			return false;
+		old = atomic_cmpxchg(v, cur, cur + 1);
+		if (old == cur)
+			break;
+		cur = old;
+	}
+
+	return true;
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit)
+{
+	return atomic_inc_below(&rq_wait->inflight, limit);
+}
+
+void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->cleanup)
+			rqos->ops->cleanup(rqos, wb_acct);
+	}
+}
+
+void rq_qos_done(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->done)
+			rqos->ops->done(rqos, rq);
+	}
+}
+
+void rq_qos_issue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->issue)
+			rqos->ops->issue(rqos, rq);
+	}
+}
+
+void rq_qos_requeue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->requeue)
+			rqos->ops->requeue(rqos, rq);
+	}
+}
+
+enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio,
+			       spinlock_t *lock)
+{
+	struct rq_qos *rqos;
+	enum wbt_flags flags = 0;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->throttle)
+			flags |= rqos->ops->throttle(rqos, bio, lock);
+	}
+	return flags;
+}
+
+/*
+ * Return true, if we can't increase the depth further by scaling
+ */
+bool rq_depth_calc_max_depth(struct rq_depth *rqd)
+{
+	unsigned int depth;
+	bool ret = false;
+
+	/*
+	 * For QD=1 devices, this is a special case. It's important for those
+	 * to have one request ready when one completes, so force a depth of
+	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
+	 * since the device can't have more than that in flight. If we're
+	 * scaling down, then keep a setting of 1/1/1.
+	 */
+	if (rqd->queue_depth == 1) {
+		if (rqd->scale_step > 0)
+			rqd->max_depth = 1;
+		else {
+			rqd->max_depth = 2;
+			ret = true;
+		}
+	} else {
+		/*
+		 * scale_step == 0 is our default state. If we have suffered
+		 * latency spikes, step will be > 0, and we shrink the
+		 * allowed write depths. If step is < 0, we're only doing
+		 * writes, and we allow a temporarily higher depth to
+		 * increase performance.
+		 */
+		depth = min_t(unsigned int, rqd->default_depth,
+			      rqd->queue_depth);
+		if (rqd->scale_step > 0)
+			depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
+		else if (rqd->scale_step < 0) {
+			unsigned int maxd = 3 * rqd->queue_depth / 4;
+
+			depth = 1 + ((depth - 1) << -rqd->scale_step);
+			if (depth > maxd) {
+				depth = maxd;
+				ret = true;
+			}
+		}
+
+		rqd->max_depth = depth;
+	}
+
+	return ret;
+}
+
+void rq_depth_scale_up(struct rq_depth *rqd)
+{
+	/*
+	 * Hit max in previous round, stop here
+	 */
+	if (rqd->scaled_max)
+		return;
+
+	rqd->scale_step--;
+
+	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+}
+
+/*
+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
+ * had a latency violation.
+ */
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+{
+	/*
+	 * Stop scaling down when we've hit the limit. This also prevents
+	 * ->scale_step from going to crazy values, if the device can't
+	 * keep up.
+	 */
+	if (rqd->max_depth == 1)
+		return;
+
+	if (rqd->scale_step < 0 && hard_throttle)
+		rqd->scale_step = 0;
+	else
+		rqd->scale_step++;
+
+	rqd->scaled_max = false;
+	rq_depth_calc_max_depth(rqd);
+}
+
+void rq_qos_exit(struct request_queue *q)
+{
+	while (q->rq_qos) {
+		struct rq_qos *rqos = q->rq_qos;
+		q->rq_qos = rqos->next;
+		rqos->ops->exit(rqos);
+	}
+}
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
new file mode 100644
index 000000000000..f9a39bd6ece3
--- /dev/null
+++ b/block/blk-rq-qos.h
@@ -0,0 +1,106 @@
+#ifndef RQ_QOS_H
+#define RQ_QOS_H
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/blk_types.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+
+enum rq_qos_id {
+	RQ_QOS_WBT,
+	RQ_QOS_CGROUP,
+};
+
+struct rq_wait {
+	wait_queue_head_t wait;
+	atomic_t inflight;
+};
+
+struct rq_qos {
+	struct rq_qos_ops *ops;
+	struct request_queue *q;
+	enum rq_qos_id id;
+	struct rq_qos *next;
+};
+
+struct rq_qos_ops {
+	enum wbt_flags (*throttle)(struct rq_qos *, struct bio *,
+				   spinlock_t *);
+	void (*issue)(struct rq_qos *, struct request *);
+	void (*requeue)(struct rq_qos *, struct request *);
+	void (*done)(struct rq_qos *, struct request *);
+	void (*cleanup)(struct rq_qos *, enum wbt_flags);
+	void (*exit)(struct rq_qos *);
+};
+
+struct rq_depth {
+	unsigned int max_depth;
+
+	int scale_step;
+	bool scaled_max;
+
+	unsigned int queue_depth;
+	unsigned int default_depth;
+};
+
+static inline struct rq_qos *rq_qos_id(struct request_queue *q,
+				       enum rq_qos_id id)
+{
+	struct rq_qos *rqos;
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->id == id)
+			break;
+	}
+	return rqos;
+}
+
+static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_WBT);
+}
+
+static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_CGROUP);
+}
+
+static inline void rq_wait_init(struct rq_wait *rq_wait)
+{
+	atomic_set(&rq_wait->inflight, 0);
+	init_waitqueue_head(&rq_wait->wait);
+}
+
+static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+{
+	rqos->next = q->rq_qos;
+	q->rq_qos = rqos;
+}
+
+static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+{
+	struct rq_qos *cur, *prev = NULL;
+	for (cur = q->rq_qos; cur; cur = cur->next) {
+		if (cur == rqos) {
+			if (prev)
+				prev->next = rqos->next;
+			else
+				q->rq_qos = cur;
+			break;
+		}
+		prev = cur;
+	}
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit);
+void rq_depth_scale_up(struct rq_depth *rqd);
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_calc_max_depth(struct rq_depth *rqd);
+
+void rq_qos_cleanup(struct request_queue *, enum wbt_flags);
+void rq_qos_done(struct request_queue *, struct request *);
+void rq_qos_issue(struct request_queue *, struct request *);
+void rq_qos_requeue(struct request_queue *, struct request *);
+enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *);
+void rq_qos_exit(struct request_queue *);
+#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d1de71124656..053de87d1fda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 	q->queue_depth = depth;
-	wbt_set_queue_depth(q->rq_wb, depth);
+	wbt_set_queue_depth(q, depth);
 }
 EXPORT_SYMBOL(blk_set_queue_depth);
 
@@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 		queue_flag_clear(QUEUE_FLAG_FUA, q);
 	spin_unlock_irq(q->queue_lock);
 
-	wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 94987b1f69e1..49c29a5d06bb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 
 static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
 {
-	if (!q->rq_wb)
+	if (!wbt_rq_qos(q))
 		return -EINVAL;
 
-	return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+	return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
 }
 
 static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 				  size_t count)
 {
-	struct rq_wb *rwb;
+	struct rq_qos *rqos;
 	ssize_t ret;
 	s64 val;
 
@@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 	if (val < -1)
 		return -EINVAL;
 
-	rwb = q->rq_wb;
-	if (!rwb) {
+	rqos = wbt_rq_qos(q);
+	if (!rqos) {
 		ret = wbt_init(q);
 		if (ret)
 			return ret;
 	}
 
-	rwb = q->rq_wb;
 	if (val == -1)
-		rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+		val = wbt_default_latency_nsec(q);
 	else if (val >= 0)
-		rwb->min_lat_nsec = val * 1000ULL;
+		val *= 1000ULL;
 
-	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		rwb->enable_state = WBT_STATE_ON_MANUAL;
+	wbt_set_min_lat(q, val);
 
-	wbt_update_limits(rwb);
+	wbt_update_limits(q);
 	return count;
 }
 
@@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
-	wbt_exit(q);
+	rq_qos_exit(q);
 
 	mutex_lock(&q->sysfs_lock);
 	if (q->request_fn || (q->mq_ops && q->elevator))
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 4f89b28fa652..6fe20fb823e4 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -25,6 +25,7 @@
 #include <linux/swap.h>
 
 #include "blk-wbt.h"
+#include "blk-rq-qos.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/wbt.h>
@@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb)
 	return rwb && rwb->wb_normal != 0;
 }
 
-/*
- * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
- * false if 'v' + 1 would be bigger than 'below'.
- */
-static bool atomic_inc_below(atomic_t *v, int below)
-{
-	int cur = atomic_read(v);
-
-	for (;;) {
-		int old;
-
-		if (cur >= below)
-			return false;
-		old = atomic_cmpxchg(v, cur, cur + 1);
-		if (old == cur)
-			break;
-		cur = old;
-	}
-
-	return true;
-}
-
 static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
 {
 	if (rwb_enabled(rwb)) {
@@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
  */
 static bool wb_recent_wait(struct rq_wb *rwb)
 {
-	struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb;
+	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
 
 	return time_before(jiffies, wb->dirty_sleep + HZ);
 }
@@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb)
 	}
 }
 
-void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
+static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	struct rq_wait *rqw;
 	int inflight, limit;
 
@@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
  * Called on completion of a request. Note that it's also called when
  * a request is merged, when the request gets freed.
  */
-void wbt_done(struct rq_wb *rwb, struct request *rq)
+static void wbt_done(struct rq_qos *rqos, struct request *rq)
 {
-	if (!rwb)
-		return;
+	struct rq_wb *rwb = RQWB(rqos);
 
 	if (!wbt_is_tracked(rq)) {
 		if (rwb->sync_cookie == rq) {
@@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq)
 			wb_timestamp(rwb, &rwb->last_comp);
 	} else {
 		WARN_ON_ONCE(rq == rwb->sync_cookie);
-		__wbt_done(rwb, wbt_flags(rq));
+		__wbt_done(rqos, wbt_flags(rq));
 	}
 	wbt_clear_state(rq);
 }
 
-/*
- * Return true, if we can't increase the depth further by scaling
- */
-static bool calc_wb_limits(struct rq_wb *rwb)
-{
-	unsigned int depth;
-	bool ret = false;
-
-	if (!rwb->min_lat_nsec) {
-		rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
-		return false;
-	}
-
-	/*
-	 * For QD=1 devices, this is a special case. It's important for those
-	 * to have one request ready when one completes, so force a depth of
-	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
-	 * since the device can't have more than that in flight. If we're
-	 * scaling down, then keep a setting of 1/1/1.
-	 */
-	if (rwb->queue_depth == 1) {
-		if (rwb->scale_step > 0)
-			rwb->wb_max = rwb->wb_normal = 1;
-		else {
-			rwb->wb_max = rwb->wb_normal = 2;
-			ret = true;
-		}
-		rwb->wb_background = 1;
-	} else {
-		/*
-		 * scale_step == 0 is our default state. If we have suffered
-		 * latency spikes, step will be > 0, and we shrink the
-		 * allowed write depths. If step is < 0, we're only doing
-		 * writes, and we allow a temporarily higher depth to
-		 * increase performance.
-		 */
-		depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
-		if (rwb->scale_step > 0)
-			depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
-		else if (rwb->scale_step < 0) {
-			unsigned int maxd = 3 * rwb->queue_depth / 4;
-
-			depth = 1 + ((depth - 1) << -rwb->scale_step);
-			if (depth > maxd) {
-				depth = maxd;
-				ret = true;
-			}
-		}
-
-		/*
-		 * Set our max/normal/bg queue depths based on how far
-		 * we have scaled down (->scale_step).
-		 */
-		rwb->wb_max = depth;
-		rwb->wb_normal = (rwb->wb_max + 1) / 2;
-		rwb->wb_background = (rwb->wb_max + 3) / 4;
-	}
-
-	return ret;
-}
-
 static inline bool stat_sample_valid(struct blk_rq_stat *stat)
 {
 	/*
@@ -307,7 +225,8 @@ enum {
 
 static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	u64 thislat;
 
 	/*
@@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 		return LAT_EXCEEDED;
 	}
 
-	if (rwb->scale_step)
+	if (rqd->scale_step)
 		trace_wbt_stat(bdi, stat);
 
 	return LAT_OK;
@@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 
 static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 
-	trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec,
-			rwb->wb_background, rwb->wb_normal, rwb->wb_max);
+	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
+			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
 }
 
-static void scale_up(struct rq_wb *rwb)
+static void calc_wb_limits(struct rq_wb *rwb)
 {
-	/*
-	 * Hit max in previous round, stop here
-	 */
-	if (rwb->scaled_max)
-		return;
+	if (rwb->min_lat_nsec == 0) {
+		rwb->wb_normal = rwb->wb_background = 0;
+	} else if (rwb->rq_depth.max_depth <= 2) {
+		rwb->wb_normal = rwb->rq_depth.max_depth;
+		rwb->wb_background = 1;
+	} else {
+		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
+		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
+	}
+}
 
-	rwb->scale_step--;
+static void scale_up(struct rq_wb *rwb)
+{
+	rq_depth_scale_up(&rwb->rq_depth);
+	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
-
-	rwb->scaled_max = calc_wb_limits(rwb);
-
-	rwb_wake_all(rwb);
-
-	rwb_trace_step(rwb, "step up");
+	rwb_trace_step(rwb, "scale up");
 }
 
-/*
- * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
- */
 static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
-	/*
-	 * Stop scaling down when we've hit the limit. This also prevents
-	 * ->scale_step from going to crazy values, if the device can't
-	 * keep up.
-	 */
-	if (rwb->wb_max == 1)
-		return;
-
-	if (rwb->scale_step < 0 && hard_throttle)
-		rwb->scale_step = 0;
-	else
-		rwb->scale_step++;
-
-	rwb->scaled_max = false;
-	rwb->unknown_cnt = 0;
+	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
 	calc_wb_limits(rwb);
-	rwb_trace_step(rwb, "step down");
+	rwb->unknown_cnt = 0;
+	rwb_wake_all(rwb);
+	rwb_trace_step(rwb, "scale down");
 }
 
 static void rwb_arm_timer(struct rq_wb *rwb)
 {
-	if (rwb->scale_step > 0) {
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	if (rqd->scale_step > 0) {
 		/*
 		 * We should speed this up, using some variant of a fast
 		 * integer inverse square root calculation. Since we only do
@@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 		 * though.
 		 */
 		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
-					int_sqrt((rwb->scale_step + 1) << 8));
+					int_sqrt((rqd->scale_step + 1) << 8));
 	} else {
 		/*
 		 * For step < 0, we don't want to increase/decrease the
@@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 static void wb_timer_fn(struct blk_stat_callback *cb)
 {
 	struct rq_wb *rwb = cb->data;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	unsigned int inflight = wbt_inflight(rwb);
 	int status;
 
 	status = latency_exceeded(rwb, cb->stat);
 
-	trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
+	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
 			inflight);
 
 	/*
@@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 		 * currently don't have a valid read/write sample. For that
 		 * case, slowly return to center state (step == 0).
 		 */
-		if (rwb->scale_step > 0)
+		if (rqd->scale_step > 0)
 			scale_up(rwb);
-		else if (rwb->scale_step < 0)
+		else if (rqd->scale_step < 0)
 			scale_down(rwb, false);
 		break;
 	default:
@@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 	/*
 	 * Re-arm timer, if we have IO in flight
 	 */
-	if (rwb->scale_step || inflight)
+	if (rqd->scale_step || inflight)
 		rwb_arm_timer(rwb);
 }
 
-void wbt_update_limits(struct rq_wb *rwb)
+static void __wbt_update_limits(struct rq_wb *rwb)
 {
-	rwb->scale_step = 0;
-	rwb->scaled_max = false;
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	rqd->scale_step = 0;
+	rqd->scaled_max = false;
+
+	rq_depth_calc_max_depth(rqd);
 	calc_wb_limits(rwb);
 
 	rwb_wake_all(rwb);
 }
 
+void wbt_update_limits(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+u64 wbt_get_min_lat(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return 0;
+	return RQWB(rqos)->min_lat_nsec;
+}
+
+void wbt_set_min_lat(struct request_queue *q, u64 val)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	RQWB(rqos)->min_lat_nsec = val;
+	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+
 static bool close_io(struct rq_wb *rwb)
 {
 	const unsigned long now = jiffies;
@@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	 * IO for a bit.
 	 */
 	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
-		limit = rwb->wb_max;
+		limit = rwb->rq_depth.max_depth;
 	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
 		/*
 		 * If less than 100ms since we completed unrelated IO,
@@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	    rqw->wait.head.next != &wait->entry)
 		return false;
 
-	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
+	return rq_wait_inc_below(rqw, get_limit(rwb, rw));
 }
 
 /*
@@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.
  */
-enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
+static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio,
+			       spinlock_t *lock)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	enum wbt_flags ret = 0;
 
 	if (!rwb_enabled(rwb))
@@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 	return ret | WBT_TRACKED;
 }
 
-void wbt_issue(struct rq_wb *rwb, struct request *rq)
+void wbt_issue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
+
 	if (!rwb_enabled(rwb))
 		return;
 
@@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+void wbt_requeue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	if (!rwb_enabled(rwb))
 		return;
 	if (rq == rwb->sync_cookie) {
@@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
-	if (rwb) {
-		rwb->queue_depth = depth;
-		wbt_update_limits(rwb);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos) {
+		RQWB(rqos)->rq_depth.queue_depth = depth;
+		__wbt_update_limits(RQWB(rqos));
 	}
 }
 
-void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
-{
-	if (rwb)
-		rwb->wc = write_cache_on;
-}
-
-/*
- * Disable wbt, if enabled by default.
- */
-void wbt_disable_default(struct request_queue *q)
+void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
 {
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		wbt_exit(q);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos)
+		RQWB(rqos)->wc = write_cache_on;
 }
-EXPORT_SYMBOL_GPL(wbt_disable_default);
 
 /*
  * Enable wbt if defaults are configured that way
  */
 void wbt_enable_default(struct request_queue *q)
 {
+	struct rq_qos *rqos = wbt_rq_qos(q);
 	/* Throttling already enabled? */
-	if (q->rq_wb)
+	if (rqos)
 		return;
 
 	/* Queue not registered? Maybe shutting down... */
@@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq)
 	return -1;
 }
 
+static void wbt_exit(struct rq_qos *rqos)
+{
+	struct rq_wb *rwb = RQWB(rqos);
+	struct request_queue *q = rqos->q;
+
+	blk_stat_remove_callback(q, rwb->cb);
+	blk_stat_free_callback(rwb->cb);
+	kfree(rwb);
+}
+
+/*
+ * Disable wbt, if enabled by default.
+ */
+void wbt_disable_default(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	struct rq_wb *rwb;
+	if (!rqos)
+		return;
+	rwb = RQWB(rqos);
+	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
+		rwb->wb_normal = 0;
+}
+EXPORT_SYMBOL_GPL(wbt_disable_default);
+
+
+static struct rq_qos_ops wbt_rqos_ops = {
+	.throttle = wbt_wait,
+	.issue = wbt_issue,
+	.requeue = wbt_requeue,
+	.done = wbt_done,
+	.cleanup = __wbt_done,
+	.exit = wbt_exit,
+};
+
 int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
@@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < WBT_NUM_RWQ; i++) {
-		atomic_set(&rwb->rq_wait[i].inflight, 0);
-		init_waitqueue_head(&rwb->rq_wait[i].wait);
-	}
+	for (i = 0; i < WBT_NUM_RWQ; i++)
+		rq_wait_init(&rwb->rq_wait[i]);
 
+	rwb->rqos.id = RQ_QOS_WBT;
+	rwb->rqos.ops = &wbt_rqos_ops;
+	rwb->rqos.q = q;
 	rwb->last_comp = rwb->last_issue = jiffies;
-	rwb->queue = q;
 	rwb->win_nsec = RWB_WINDOW_NSEC;
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
-	wbt_update_limits(rwb);
+	rwb->wc = 1;
+	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
+	__wbt_update_limits(rwb);
 
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	q->rq_wb = rwb;
+	rq_qos_add(q, &rwb->rqos);
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
 
-	wbt_set_queue_depth(rwb, blk_queue_depth(q));
-	wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_queue_depth(q, blk_queue_depth(q));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 
 	return 0;
 }
-
-void wbt_exit(struct request_queue *q)
-{
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb) {
-		blk_stat_remove_callback(q, rwb->cb);
-		blk_stat_free_callback(rwb->cb);
-		q->rq_wb = NULL;
-		kfree(rwb);
-	}
-}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 300df531d0a6..53b20a58c0a2 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -9,6 +9,7 @@
 #include <linux/ktime.h>
 
 #include "blk-stat.h"
+#include "blk-rq-qos.h"
 
 enum wbt_flags {
 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
@@ -35,20 +36,12 @@ enum {
 	WBT_STATE_ON_MANUAL	= 2,
 };
 
-struct rq_wait {
-	wait_queue_head_t wait;
-	atomic_t inflight;
-};
-
 struct rq_wb {
 	/*
 	 * Settings that govern how we throttle
 	 */
 	unsigned int wb_background;		/* background writeback */
 	unsigned int wb_normal;			/* normal writeback */
-	unsigned int wb_max;			/* max throughput writeback */
-	int scale_step;
-	bool scaled_max;
 
 	short enable_state;			/* WBT_STATE_* */
 
@@ -67,15 +60,20 @@ struct rq_wb {
 	void *sync_cookie;
 
 	unsigned int wc;
-	unsigned int queue_depth;
 
 	unsigned long last_issue;		/* last non-throttled issue */
 	unsigned long last_comp;		/* last non-throttled comp */
 	unsigned long min_lat_nsec;
-	struct request_queue *queue;
+	struct rq_qos rqos;
 	struct rq_wait rq_wait[WBT_NUM_RWQ];
+	struct rq_depth rq_depth;
 };
 
+static inline struct rq_wb *RQWB(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct rq_wb, rqos);
+}
+
 static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 {
 	unsigned int i, ret = 0;
@@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 	return ret;
 }
 
+
 #ifdef CONFIG_BLK_WBT
 
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
@@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 	rq->wbt_flags |= flags;
 }
 
-void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct request *);
-enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
 int wbt_init(struct request_queue *);
-void wbt_exit(struct request_queue *);
-void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct request *);
-void wbt_issue(struct rq_wb *, struct request *);
+void wbt_update_limits(struct request_queue *);
 void wbt_disable_default(struct request_queue *);
 void wbt_enable_default(struct request_queue *);
 
-void wbt_set_queue_depth(struct rq_wb *, unsigned int);
-void wbt_set_write_cache(struct rq_wb *, bool);
+u64 wbt_get_min_lat(struct request_queue *q);
+void wbt_set_min_lat(struct request_queue *q, u64 val);
+
+void wbt_set_queue_depth(struct request_queue *, unsigned int);
+void wbt_set_write_cache(struct request_queue *, bool);
 
 u64 wbt_default_latency_nsec(struct request_queue *);
 
@@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 {
 }
-static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
-{
-}
-static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
-{
-}
-static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
-				      spinlock_t *lock)
-{
-	return 0;
-}
 static inline int wbt_init(struct request_queue *q)
 {
 	return -EINVAL;
 }
-static inline void wbt_exit(struct request_queue *q)
-{
-}
-static inline void wbt_update_limits(struct rq_wb *rwb)
+static inline void wbt_update_limits(struct request_queue *q)
 {
 }
-static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_disable_default(struct request_queue *q)
 {
 }
-static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_enable_default(struct request_queue *q)
 {
 }
-static inline void wbt_disable_default(struct request_queue *q)
+static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 }
-static inline void wbt_enable_default(struct request_queue *q)
+static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
 {
 }
-static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+static inline u64 wbt_get_min_lat(struct request_queue *q)
 {
+	return 0;
 }
-static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc)
+static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
 {
 }
 static inline u64 wbt_default_latency_nsec(struct request_queue *q)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9d05646d5059..137759862f07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -42,7 +42,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
-struct rq_wb;
+struct rq_qos;
 struct blk_queue_stats;
 struct blk_stat_callback;
 
@@ -443,7 +443,7 @@ struct request_queue {
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
 	struct blk_queue_stats	*stats;
-	struct rq_wb		*rq_wb;
+	struct rq_qos		*rq_qos;
 
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
-- 
cgit v1.2.3


From 05814a10370b3252fe2b0898b6adac3cdd531096 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Fri, 13 Jul 2018 17:07:26 +0300
Subject: block: remove blkdev_entry_to_request() macro

Remove blkdev_entry_to_request() macro, which remained unused through
the observable history, also note that it repeats list_entry_rq() macro
verbatim.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 137759862f07..1939ed95f936 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1436,8 +1436,6 @@ enum blk_default_limits {
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 
-#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
-- 
cgit v1.2.3


From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:36 -0700
Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool

c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for
read/write") replaced @op with boolean @is_write, which limited the
amount of information going into ->rw_page() and more importantly
page_endio(), which removed the need to expose block internals to mm.

Unfortunately, we want to track discards separately and @is_write
isn't enough information.  This patch updates bdev_ops->rw_page() to
take REQ_OP instead but leaves page_endio() to take bool @is_write.
This allows the block part of operations to have enough information
while not leaking it to mm.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c           | 14 +++++++-------
 drivers/block/zram/zram_drv.c | 16 ++++++++--------
 drivers/nvdimm/btt.c          | 12 ++++++------
 drivers/nvdimm/pmem.c         | 13 ++++++-------
 fs/block_dev.c                |  6 ++++--
 fs/mpage.c                    |  4 ++--
 include/linux/blkdev.h        |  2 +-
 7 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bb976598ee43..df8103dd40ac 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
  * Process a single bvec of a bio.
  */
 static int brd_do_bvec(struct brd_device *brd, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	void *mem;
 	int err = 0;
 
-	if (is_write) {
+	if (op_is_write(op)) {
 		err = copy_to_brd_setup(brd, sector, len);
 		if (err)
 			goto out;
 	}
 
 	mem = kmap_atomic(page);
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		int err;
 
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-					op_is_write(bio_op(bio)), sector);
+				  bio_op(bio), sector);
 		if (err)
 			goto io_error;
 		sector += len >> SECTOR_SHIFT;
@@ -310,15 +310,15 @@ io_error:
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	int err;
 
 	if (PageTransHuge(page))
 		return -ENOTSUPP;
-	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
-	page_endio(page, is_write, err);
+	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
+	page_endio(page, op_is_write(op), err);
 	return err;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7436b2d27fa3..78c29044684a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  * Returns 1 if IO request was successfully submitted.
  */
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-			int offset, bool is_write, struct bio *bio)
+			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
 	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
 		flush_dcache_page(bvec->bv_page);
@@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 	zram_slot_unlock(zram, index);
 
 	if (unlikely(ret < 0)) {
-		if (!is_write)
+		if (!op_is_write(op))
 			atomic64_inc(&zram->stats.failed_reads);
 		else
 			atomic64_inc(&zram->stats.failed_writes);
@@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
 							unwritten);
 			if (zram_bvec_rw(zram, &bv, index, offset,
-					op_is_write(bio_op(bio)), bio) < 0)
+					 bio_op(bio), bio) < 0)
 				goto out;
 
 			bv.bv_offset += bv.bv_len;
@@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
 }
 
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	int offset, ret;
 	u32 index;
@@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	bv.bv_len = PAGE_SIZE;
 	bv.bv_offset = 0;
 
-	ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
+	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
 out:
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
@@ -1429,7 +1429,7 @@ out:
 
 	switch (ret) {
 	case 0:
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 		break;
 	case 1:
 		ret = 0;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 85de8053aa34..0360c015f658 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 			struct page *page, unsigned int len, unsigned int off,
-			bool is_write, sector_t sector)
+			unsigned int op, sector_t sector)
 {
 	int ret;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		ret = btt_read_pg(btt, bip, page, off, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 		}
 
 		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
-				  op_is_write(bio_op(bio)), iter.bi_sector);
+				  bio_op(bio), iter.bi_sector);
 		if (err) {
 			dev_err(&btt->nd_btt->dev,
 					"io error in %s sector %lld, len %d,\n",
@@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
-		struct page *page, bool is_write)
+		struct page *page, unsigned int op)
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 	int rc;
 	unsigned int len;
 
 	len = hpage_nr_pages(page) * PAGE_SIZE;
-	rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
+	rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f1a224..dd17acd8fe68 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 }
 
 static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	blk_status_t rc = BLK_STS_OK;
@@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		if (unlikely(bad_pmem))
 			rc = BLK_STS_IOERR;
 		else {
@@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-				bvec.bv_offset, op_is_write(bio_op(bio)),
-				iter.bi_sector);
+				bvec.bv_offset, bio_op(bio), iter.bi_sector);
 		if (rc) {
 			bio->bi_status = rc;
 			break;
@@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-			  0, is_write, sector);
+			  0, op, sector);
 
 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
@@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	 * caused by double completion.
 	 */
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return blk_status_to_errno(rc);
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..496fb51a1e1a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	result = blk_queue_enter(bdev->bd_queue, 0);
 	if (result)
 		return result;
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_READ);
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
@@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 		return result;
 
 	set_page_writeback(page);
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_WRITE);
 	if (result) {
 		end_page_writeback(page);
 	} else {
diff --git a/fs/mpage.c b/fs/mpage.c
index b7e7f570733a..b73638db9866 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)),
-				blk_status_to_errno(bio->bi_status));
+		page_endio(page, bio_op(bio),
+			   blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1939ed95f936..331a6cb8805f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
-	int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	unsigned int (*check_events) (struct gendisk *disk,
-- 
cgit v1.2.3


From 359f642700f2ff05d9c94cd9216c97af7b8e9553 Mon Sep 17 00:00:00 2001
From: Greg Edwards <gedwards@ddn.com>
Date: Wed, 25 Jul 2018 10:22:58 -0400
Subject: block: move bio_integrity_{intervals,bytes} into blkdev.h

This allows bio_integrity_bytes() to be called from drivers instead of
open coding it.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Edwards <gedwards@ddn.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c  | 22 ----------------------
 include/linux/blkdev.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index add7c7c85335..67b5fb861a51 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,28 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_intervals - Return number of integrity intervals for a bio
- * @bi:		blk_integrity profile for device
- * @sectors:	Size of the bio in 512-byte sectors
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the data integrity
- * interval size of the storage device.  Convert the block layer sectors
- * to the appropriate number of integrity intervals.
- */
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
-						   unsigned int sectors)
-{
-	return sectors >> (bi->interval_exp - 9);
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
-					       unsigned int sectors)
-{
-	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
-}
-
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 331a6cb8805f..050d599f5ea9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1865,6 +1865,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 				bip_next->bip_vec[0].bv_offset);
 }
 
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi:		blk_integrity profile for device
+ * @sectors:	Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct bio;
@@ -1938,6 +1960,18 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 	return false;
 }
 
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct block_device_operations {
-- 
cgit v1.2.3


From b1f4267cc5448d20ae0c515a74141e74365e78a3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 9 Aug 2018 07:47:28 -0700
Subject: block: Remove two superfluous #include directives

Commit 12f5b9314545 ("blk-mq: Remove generation seqeunce") removed the
only seqcount_t and u64_stats_sync instances from <linux/blkdev.h> but
did not remove the corresponding #include directives. Since these
include directives are no longer needed, remove them.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>,
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 050d599f5ea9..d6869e0e2b64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/seqlock.h>
-#include <linux/u64_stats_sync.h>
 
 struct module;
 struct scsi_ioctl_command;
-- 
cgit v1.2.3