diff options
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 160 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 14 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 1 |
3 files changed, 150 insertions, 25 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 48771f492d49..efee1ba800d5 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -46,13 +46,76 @@ static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ /* - * How many reqeusts do we allocate per queue, - * and how many do we "batch" on freeing them? + * Number of requests per queue. This many for reads and for writes (twice + * this number, total). */ -int queue_nr_requests, batch_requests; +static int queue_nr_requests; + +/* + * How many free requests must be available before we wake a process which + * is waiting for a request? + */ +static int batch_requests; + unsigned long blk_max_low_pfn, blk_max_pfn; int blk_nohighio = 0; +static struct congestion_state { + wait_queue_head_t wqh; + atomic_t nr_congested_queues; +} congestion_states[2]; + +/* + * Return the threshold (number of free requests) at which the queue is + * considered to be congested. It include a little hysteresis to keep the + * context switch rate down. + */ +static inline int queue_congestion_on_threshold(void) +{ + int ret; + + ret = queue_nr_requests / 4 - 1; + if (ret < 0) + ret = 1; + return ret; +} + +/* + * The threshold at which a queue is considered to be uncongested + */ +static inline int queue_congestion_off_threshold(void) +{ + int ret; + + ret = queue_nr_requests / 4 + 1; + if (ret > queue_nr_requests) + ret = queue_nr_requests; + return ret; +} + +static void clear_queue_congested(request_queue_t *q, int rw) +{ + enum bdi_state bit; + struct congestion_state *cs = &congestion_states[rw]; + + bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + + if (test_and_clear_bit(bit, &q->backing_dev_info.state)) + atomic_dec(&cs->nr_congested_queues); + if (waitqueue_active(&cs->wqh)) + wake_up(&cs->wqh); +} + +static void set_queue_congested(request_queue_t *q, int rw) +{ + enum bdi_state bit; + + bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + + if (!test_and_set_bit(bit, &q->backing_dev_info.state)) + atomic_inc(&congestion_states[rw].nr_congested_queues); +} + /** * bdev_get_queue: - return the queue that matches the given device * @bdev: device @@ -360,8 +423,8 @@ int blk_queue_init_tags(request_queue_t *q, int depth) struct blk_queue_tag *tags; int bits, i; - if (depth > queue_nr_requests) { - depth = queue_nr_requests; + if (depth > (queue_nr_requests*2)) { + depth = (queue_nr_requests*2); printk("blk_queue_init_tags: adjusted depth to %d\n", depth); } @@ -1019,7 +1082,7 @@ static int __blk_cleanup_queue(struct request_list *list) **/ void blk_cleanup_queue(request_queue_t * q) { - int count = queue_nr_requests; + int count = (queue_nr_requests*2); count -= __blk_cleanup_queue(&q->rq[READ]); count -= __blk_cleanup_queue(&q->rq[WRITE]); @@ -1050,7 +1113,7 @@ static int blk_init_free_list(request_queue_t *q) * Divide requests in half between read and write */ rl = &q->rq[READ]; - for (i = 0; i < queue_nr_requests; i++) { + for (i = 0; i < (queue_nr_requests*2); i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); if (!rq) goto nomem; @@ -1058,7 +1121,7 @@ static int blk_init_free_list(request_queue_t *q) /* * half way through, switch to WRITE list */ - if (i == queue_nr_requests / 2) + if (i == queue_nr_requests) rl = &q->rq[WRITE]; memset(rq, 0, sizeof(struct request)); @@ -1144,7 +1207,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ -static inline struct request *get_request(request_queue_t *q, int rw) +static struct request *get_request(request_queue_t *q, int rw) { struct request *rq = NULL; struct request_list *rl = q->rq + rw; @@ -1153,6 +1216,8 @@ static inline struct request *get_request(request_queue_t *q, int rw) rq = blkdev_free_rq(&rl->free); list_del(&rq->queuelist); rl->count--; + if (rl->count < queue_congestion_on_threshold()) + set_queue_congested(q, rw); rq->flags = 0; rq->rq_status = RQ_ACTIVE; rq->special = NULL; @@ -1365,13 +1430,50 @@ void blk_put_request(struct request *req) * it didn't come out of our reserved rq pools */ if (rl) { + int rw = 0; + list_add(&req->queuelist, &rl->free); - if (++rl->count >= batch_requests &&waitqueue_active(&rl->wait)) + if (rl == &q->rq[WRITE]) + rw = WRITE; + else if (rl == &q->rq[READ]) + rw = READ; + else + BUG(); + + rl->count++; + if (rl->count >= queue_congestion_off_threshold()) + clear_queue_congested(q, rw); + if (rl->count >= batch_requests && waitqueue_active(&rl->wait)) wake_up(&rl->wait); } } +/** + * blk_congestion_wait - wait for a queue to become uncongested + * @rw: READ or WRITE + * @timeout: timeout in jiffies + * + * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. + * If no queues are congested then just return, in the hope that the caller + * will submit some more IO. + */ +void blk_congestion_wait(int rw, long timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct congestion_state *cs = &congestion_states[rw]; + + if (atomic_read(&cs->nr_congested_queues) == 0) + return; + blk_run_queues(); + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&cs->wqh, &wait); + if (atomic_read(&cs->nr_congested_queues) != 0) + schedule_timeout(timeout); + set_current_state(TASK_RUNNING); + remove_wait_queue(&cs->wqh, &wait); +} + /* * Has to be called with the request spinlock acquired */ @@ -1868,6 +1970,7 @@ void end_that_request_last(struct request *req) int __init blk_dev_init(void) { int total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + int i; request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), 0, @@ -1876,26 +1979,33 @@ int __init blk_dev_init(void) panic("Can't create request pool slab cache\n"); /* - * Free request slots per queue. - * (Half for reads, half for writes) - */ - queue_nr_requests = (total_ram >> 8) & ~15; /* One per quarter-megabyte */ - if (queue_nr_requests < 32) - queue_nr_requests = 32; - if (queue_nr_requests > 256) - queue_nr_requests = 256; - - /* - * Batch frees according to queue length + * Free request slots per queue. One per quarter-megabyte. + * We use this many requests for reads, and this many for writes. */ - if ((batch_requests = queue_nr_requests / 4) > 32) - batch_requests = 32; - printk("block: %d slots per queue, batch=%d\n", - queue_nr_requests, batch_requests); + queue_nr_requests = (total_ram >> 9) & ~7; + if (queue_nr_requests < 16) + queue_nr_requests = 16; + if (queue_nr_requests > 128) + queue_nr_requests = 128; + + batch_requests = queue_nr_requests / 8; + if (batch_requests > 8) + batch_requests = 8; + + printk("block request queues:\n"); + printk(" %d requests per read queue\n", queue_nr_requests); + printk(" %d requests per write queue\n", queue_nr_requests); + printk(" %d requests per batch\n", batch_requests); + printk(" enter congestion at %d\n", queue_congestion_on_threshold()); + printk(" exit congestion at %d\n", queue_congestion_off_threshold()); blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; + for (i = 0; i < ARRAY_SIZE(congestion_states); i++) { + init_waitqueue_head(&congestion_states[i].wqh); + atomic_set(&congestion_states[i].nr_congested_queues, 0); + } return 0; }; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 898f8e1814ef..94c93c9c5f66 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -8,11 +8,15 @@ #ifndef _LINUX_BACKING_DEV_H #define _LINUX_BACKING_DEV_H +#include <asm/atomic.h> + /* * Bits in backing_dev_info.state */ enum bdi_state { BDI_pdflush, /* A pdflush thread is working this device */ + BDI_write_congested, /* The write queue is getting full */ + BDI_read_congested, /* The read queue is getting full */ BDI_unused, /* Available bits start here */ }; @@ -28,4 +32,14 @@ int writeback_acquire(struct backing_dev_info *bdi); int writeback_in_progress(struct backing_dev_info *bdi); void writeback_release(struct backing_dev_info *bdi); +static inline int bdi_read_congested(struct backing_dev_info *bdi) +{ + return test_bit(BDI_read_congested, &bdi->state); +} + +static inline int bdi_write_congested(struct backing_dev_info *bdi) +{ + return test_bit(BDI_write_congested, &bdi->state); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fa0798452e77..255001f6f433 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -345,6 +345,7 @@ extern void blk_queue_end_tag(request_queue_t *, struct request *); extern int blk_queue_init_tags(request_queue_t *, int); extern void blk_queue_free_tags(request_queue_t *); extern void blk_queue_invalidate_tags(request_queue_t *); +extern void blk_congestion_wait(int rw, long timeout); #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 |
