summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/block/ll_rw_blk.c160
-rw-r--r--include/linux/backing-dev.h14
-rw-r--r--include/linux/blkdev.h1
3 files changed, 150 insertions, 25 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 48771f492d49..efee1ba800d5 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -46,13 +46,76 @@ static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
/*
- * How many reqeusts do we allocate per queue,
- * and how many do we "batch" on freeing them?
+ * Number of requests per queue. This many for reads and for writes (twice
+ * this number, total).
*/
-int queue_nr_requests, batch_requests;
+static int queue_nr_requests;
+
+/*
+ * How many free requests must be available before we wake a process which
+ * is waiting for a request?
+ */
+static int batch_requests;
+
unsigned long blk_max_low_pfn, blk_max_pfn;
int blk_nohighio = 0;
+static struct congestion_state {
+ wait_queue_head_t wqh;
+ atomic_t nr_congested_queues;
+} congestion_states[2];
+
+/*
+ * Return the threshold (number of free requests) at which the queue is
+ * considered to be congested. It include a little hysteresis to keep the
+ * context switch rate down.
+ */
+static inline int queue_congestion_on_threshold(void)
+{
+ int ret;
+
+ ret = queue_nr_requests / 4 - 1;
+ if (ret < 0)
+ ret = 1;
+ return ret;
+}
+
+/*
+ * The threshold at which a queue is considered to be uncongested
+ */
+static inline int queue_congestion_off_threshold(void)
+{
+ int ret;
+
+ ret = queue_nr_requests / 4 + 1;
+ if (ret > queue_nr_requests)
+ ret = queue_nr_requests;
+ return ret;
+}
+
+static void clear_queue_congested(request_queue_t *q, int rw)
+{
+ enum bdi_state bit;
+ struct congestion_state *cs = &congestion_states[rw];
+
+ bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+
+ if (test_and_clear_bit(bit, &q->backing_dev_info.state))
+ atomic_dec(&cs->nr_congested_queues);
+ if (waitqueue_active(&cs->wqh))
+ wake_up(&cs->wqh);
+}
+
+static void set_queue_congested(request_queue_t *q, int rw)
+{
+ enum bdi_state bit;
+
+ bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+
+ if (!test_and_set_bit(bit, &q->backing_dev_info.state))
+ atomic_inc(&congestion_states[rw].nr_congested_queues);
+}
+
/**
* bdev_get_queue: - return the queue that matches the given device
* @bdev: device
@@ -360,8 +423,8 @@ int blk_queue_init_tags(request_queue_t *q, int depth)
struct blk_queue_tag *tags;
int bits, i;
- if (depth > queue_nr_requests) {
- depth = queue_nr_requests;
+ if (depth > (queue_nr_requests*2)) {
+ depth = (queue_nr_requests*2);
printk("blk_queue_init_tags: adjusted depth to %d\n", depth);
}
@@ -1019,7 +1082,7 @@ static int __blk_cleanup_queue(struct request_list *list)
**/
void blk_cleanup_queue(request_queue_t * q)
{
- int count = queue_nr_requests;
+ int count = (queue_nr_requests*2);
count -= __blk_cleanup_queue(&q->rq[READ]);
count -= __blk_cleanup_queue(&q->rq[WRITE]);
@@ -1050,7 +1113,7 @@ static int blk_init_free_list(request_queue_t *q)
* Divide requests in half between read and write
*/
rl = &q->rq[READ];
- for (i = 0; i < queue_nr_requests; i++) {
+ for (i = 0; i < (queue_nr_requests*2); i++) {
rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
if (!rq)
goto nomem;
@@ -1058,7 +1121,7 @@ static int blk_init_free_list(request_queue_t *q)
/*
* half way through, switch to WRITE list
*/
- if (i == queue_nr_requests / 2)
+ if (i == queue_nr_requests)
rl = &q->rq[WRITE];
memset(rq, 0, sizeof(struct request));
@@ -1144,7 +1207,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
* Get a free request. queue lock must be held and interrupts
* disabled on the way in.
*/
-static inline struct request *get_request(request_queue_t *q, int rw)
+static struct request *get_request(request_queue_t *q, int rw)
{
struct request *rq = NULL;
struct request_list *rl = q->rq + rw;
@@ -1153,6 +1216,8 @@ static inline struct request *get_request(request_queue_t *q, int rw)
rq = blkdev_free_rq(&rl->free);
list_del(&rq->queuelist);
rl->count--;
+ if (rl->count < queue_congestion_on_threshold())
+ set_queue_congested(q, rw);
rq->flags = 0;
rq->rq_status = RQ_ACTIVE;
rq->special = NULL;
@@ -1365,13 +1430,50 @@ void blk_put_request(struct request *req)
* it didn't come out of our reserved rq pools
*/
if (rl) {
+ int rw = 0;
+
list_add(&req->queuelist, &rl->free);
- if (++rl->count >= batch_requests &&waitqueue_active(&rl->wait))
+ if (rl == &q->rq[WRITE])
+ rw = WRITE;
+ else if (rl == &q->rq[READ])
+ rw = READ;
+ else
+ BUG();
+
+ rl->count++;
+ if (rl->count >= queue_congestion_off_threshold())
+ clear_queue_congested(q, rw);
+ if (rl->count >= batch_requests && waitqueue_active(&rl->wait))
wake_up(&rl->wait);
}
}
+/**
+ * blk_congestion_wait - wait for a queue to become uncongested
+ * @rw: READ or WRITE
+ * @timeout: timeout in jiffies
+ *
+ * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
+ * If no queues are congested then just return, in the hope that the caller
+ * will submit some more IO.
+ */
+void blk_congestion_wait(int rw, long timeout)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ struct congestion_state *cs = &congestion_states[rw];
+
+ if (atomic_read(&cs->nr_congested_queues) == 0)
+ return;
+ blk_run_queues();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&cs->wqh, &wait);
+ if (atomic_read(&cs->nr_congested_queues) != 0)
+ schedule_timeout(timeout);
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&cs->wqh, &wait);
+}
+
/*
* Has to be called with the request spinlock acquired
*/
@@ -1868,6 +1970,7 @@ void end_that_request_last(struct request *req)
int __init blk_dev_init(void)
{
int total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
+ int i;
request_cachep = kmem_cache_create("blkdev_requests",
sizeof(struct request), 0,
@@ -1876,26 +1979,33 @@ int __init blk_dev_init(void)
panic("Can't create request pool slab cache\n");
/*
- * Free request slots per queue.
- * (Half for reads, half for writes)
- */
- queue_nr_requests = (total_ram >> 8) & ~15; /* One per quarter-megabyte */
- if (queue_nr_requests < 32)
- queue_nr_requests = 32;
- if (queue_nr_requests > 256)
- queue_nr_requests = 256;
-
- /*
- * Batch frees according to queue length
+ * Free request slots per queue. One per quarter-megabyte.
+ * We use this many requests for reads, and this many for writes.
*/
- if ((batch_requests = queue_nr_requests / 4) > 32)
- batch_requests = 32;
- printk("block: %d slots per queue, batch=%d\n",
- queue_nr_requests, batch_requests);
+ queue_nr_requests = (total_ram >> 9) & ~7;
+ if (queue_nr_requests < 16)
+ queue_nr_requests = 16;
+ if (queue_nr_requests > 128)
+ queue_nr_requests = 128;
+
+ batch_requests = queue_nr_requests / 8;
+ if (batch_requests > 8)
+ batch_requests = 8;
+
+ printk("block request queues:\n");
+ printk(" %d requests per read queue\n", queue_nr_requests);
+ printk(" %d requests per write queue\n", queue_nr_requests);
+ printk(" %d requests per batch\n", batch_requests);
+ printk(" enter congestion at %d\n", queue_congestion_on_threshold());
+ printk(" exit congestion at %d\n", queue_congestion_off_threshold());
blk_max_low_pfn = max_low_pfn;
blk_max_pfn = max_pfn;
+ for (i = 0; i < ARRAY_SIZE(congestion_states); i++) {
+ init_waitqueue_head(&congestion_states[i].wqh);
+ atomic_set(&congestion_states[i].nr_congested_queues, 0);
+ }
return 0;
};
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 898f8e1814ef..94c93c9c5f66 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -8,11 +8,15 @@
#ifndef _LINUX_BACKING_DEV_H
#define _LINUX_BACKING_DEV_H
+#include <asm/atomic.h>
+
/*
* Bits in backing_dev_info.state
*/
enum bdi_state {
BDI_pdflush, /* A pdflush thread is working this device */
+ BDI_write_congested, /* The write queue is getting full */
+ BDI_read_congested, /* The read queue is getting full */
BDI_unused, /* Available bits start here */
};
@@ -28,4 +32,14 @@ int writeback_acquire(struct backing_dev_info *bdi);
int writeback_in_progress(struct backing_dev_info *bdi);
void writeback_release(struct backing_dev_info *bdi);
+static inline int bdi_read_congested(struct backing_dev_info *bdi)
+{
+ return test_bit(BDI_read_congested, &bdi->state);
+}
+
+static inline int bdi_write_congested(struct backing_dev_info *bdi)
+{
+ return test_bit(BDI_write_congested, &bdi->state);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fa0798452e77..255001f6f433 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -345,6 +345,7 @@ extern void blk_queue_end_tag(request_queue_t *, struct request *);
extern int blk_queue_init_tags(request_queue_t *, int);
extern void blk_queue_free_tags(request_queue_t *);
extern void blk_queue_invalidate_tags(request_queue_t *);
+extern void blk_congestion_wait(int rw, long timeout);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128