diff options
| author | Andrew Morton <akpm@osdl.org> | 2003-07-04 19:37:26 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.osdl.org> | 2003-07-04 19:37:26 -0700 |
| commit | 930805a244eaadb5aefbc08b558db72136128388 (patch) | |
| tree | 8b760be8c4fee468172cf15c95d702974533d140 | |
| parent | 16f88dbdbffa3dc52b959706e6a311a932b51ed6 (diff) | |
[PATCH] block request batching
From: Nick Piggin <piggin@cyberone.com.au>
The following patch gets batching working how it should be.
After a process is woken up, it is allowed to allocate up to 32 requests
for 20ms. It does not stop other processes submitting requests if it isn't
submitting though. This should allow less context switches, and allow
batches of requests from each process to be sent to the io scheduler
instead of 1 request from each process.
tiobench sequential writes are more than tripled, random writes are nearly
doubled over mm1. In earlier tests I generally saw better CPU efficiency
but it doesn't show here. There is still debug to be taken out. Its also
only on UP.
Avg Maximum Lat% Lat% CPU
Identifier Rate (CPU%) Latency Latency >2s >10s Eff
------------------- ------ --------- ---------- ------- ------ ----
-2.5.71-mm1 11.13 3.783% 46.10 24668.01 0.84 0.02 294
+2.5.71-mm1 13.21 4.489% 37.37 5691.66 0.76 0.00 294
Random Reads
------------------- ------ --------- ---------- ------- ------ ----
-2.5.71-mm1 0.97 0.582% 519.86 6444.66 11.93 0.00 167
+2.5.71-mm1 1.01 0.604% 484.59 6604.93 10.73 0.00 167
Sequential Writes
------------------- ------ --------- ---------- ------- ------ ----
-2.5.71-mm1 4.85 4.456% 77.80 99359.39 0.18 0.13 109
+2.5.71-mm1 14.11 14.19% 10.07 22805.47 0.09 0.04 99
Random Writes
------------------- ------ --------- ---------- ------- ------ ----
-2.5.71-mm1 0.46 0.371% 14.48 6173.90 0.23 0.00 125
+2.5.71-mm1 0.86 0.744% 24.08 8753.66 0.31 0.00 115
It decreases context switch rate on IBM's 8-way on ext2 tiobench 64 threads
from ~2500/s to ~140/s on their regression tests.
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 132 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 6 |
2 files changed, 94 insertions, 44 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 8f44b5690d9a..633266ee8c87 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -51,10 +51,11 @@ static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; -static inline int batch_requests(struct request_queue *q) -{ - return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1; -} +/* Amount of time in which a process may batch requests */ +#define BLK_BATCH_TIME (HZ/50UL) + +/* Number of requests a "batching" process may submit */ +#define BLK_BATCH_REQ 32 /* * Return the threshold (number of used requests) at which the queue is @@ -1305,24 +1306,76 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) return NULL; } +/* + * ioc_batching returns true if the ioc is a valid batching request and + * should be given priority access to a request. + */ +static inline int ioc_batching(struct io_context *ioc) +{ + if (!ioc) + return 0; + + return ioc->nr_batch_requests == BLK_BATCH_REQ || + (ioc->nr_batch_requests > 0 + && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); +} + +/* + * ioc_set_batching sets ioc to be a new "batcher" if it is not one + */ +void ioc_set_batching(struct io_context *ioc) +{ + if (!ioc || ioc_batching(ioc)) + return; + + ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->last_waited = jiffies; +} + +/* + * A request has just been released. Account for it, update the full and + * congestion status, wake up any waiters. Called under q->queue_lock. + */ +static void freed_request(request_queue_t *q, int rw) +{ + struct request_list *rl = &q->rq; + + rl->count[rw]--; + if (rl->count[rw] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, rw); + if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + if (!waitqueue_active(&rl->wait[rw])) + blk_clear_queue_full(q, rw); + } +} + #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must not be held */ -static struct request * -get_request(request_queue_t *q, int rw, int gfp_mask, int force) +static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; + struct io_context *ioc = get_io_context(); spin_lock_irq(q->queue_lock); - if (rl->count[rw] == q->nr_requests) - blk_set_queue_full(q, rw); + if (rl->count[rw]+1 >= q->nr_requests) { + if (!blk_queue_full(q, rw)) { + ioc_set_batching(ioc); + blk_set_queue_full(q, rw); + } + } - if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) { + if (blk_queue_full(q, rw) + && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } + rl->count[rw]++; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); @@ -1331,20 +1384,13 @@ get_request(request_queue_t *q, int rw, int gfp_mask, int force) rq = blk_alloc_request(q, gfp_mask); if (!rq) { spin_lock_irq(q->queue_lock); - rl->count[rw]--; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); - - if (rl->count[rw] <= batch_requests(q)) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); - else - blk_clear_queue_full(q, rw); - } - + freed_request(q, rw); spin_unlock_irq(q->queue_lock); goto out; } + + if (ioc_batching(ioc)) + ioc->nr_batch_requests--; INIT_LIST_HEAD(&rq->queuelist); @@ -1367,6 +1413,7 @@ get_request(request_queue_t *q, int rw, int gfp_mask, int force) rq->sense = NULL; out: + put_io_context(ioc); return rq; } @@ -1378,7 +1425,6 @@ static struct request *get_request_wait(request_queue_t *q, int rw) { DEFINE_WAIT(wait); struct request *rq; - int waited = 0; generic_unplug_device(q); do { @@ -1387,11 +1433,15 @@ static struct request *get_request_wait(request_queue_t *q, int rw) prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw, GFP_NOIO, waited); + rq = get_request(q, rw, GFP_NOIO); if (!rq) { + struct io_context *ioc; + io_schedule(); - waited = 1; + ioc = get_io_context(); + ioc_set_batching(ioc); + put_io_context(ioc); } finish_wait(&rl->wait[rw], &wait); } while (!rq); @@ -1408,7 +1458,7 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw); else - rq = get_request(q, rw, gfp_mask, 0); + rq = get_request(q, rw, gfp_mask); return rq; } @@ -1555,17 +1605,7 @@ void __blk_put_request(request_queue_t *q, struct request *req) BUG_ON(!list_empty(&req->queuelist)); blk_free_request(q, req); - - rl->count[rw]--; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); - - if (rl->count[rw] <= batch_requests(q)) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); - else - blk_clear_queue_full(q, rw); - } + freed_request(q, rw); } } @@ -1808,7 +1848,7 @@ get_rq: freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) { + if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { /* * READA bit set */ @@ -1852,13 +1892,12 @@ out: __blk_put_request(q, freereq); if (blk_queue_plugged(q)) { - int nr_queued = q->rq.count[0] + q->rq.count[1]; + int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; if (nr_queued == q->unplug_thresh) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); - return 0; end_io: @@ -1866,7 +1905,6 @@ end_io: return 0; } - /* * If bio->bi_dev is a partition, remap the location */ @@ -2378,6 +2416,7 @@ int __init blk_dev_init(void) return 0; } +static atomic_t nr_io_contexts = ATOMIC_INIT(0); /* * IO Context helper functions @@ -2393,6 +2432,7 @@ void put_io_context(struct io_context *ioc) if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); kfree(ioc); + atomic_dec(&nr_io_contexts); } } @@ -2409,7 +2449,8 @@ void exit_io_context(void) ioc->aic->exit(ioc->aic); put_io_context(ioc); current->io_context = NULL; - } + } else + WARN_ON(1); local_irq_restore(flags); } @@ -2432,8 +2473,11 @@ struct io_context *get_io_context(void) if (ret == NULL) { ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (ret) { + atomic_inc(&nr_io_contexts); atomic_set(&ret->refcount, 1); ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; tsk->io_context = ret; } @@ -2515,16 +2559,16 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (rl->count[READ] >= q->nr_requests) { blk_set_queue_full(q, READ); - } else if (rl->count[READ] <= batch_requests(q)) { + } else if (rl->count[READ]+1 <= q->nr_requests) { blk_clear_queue_full(q, READ); - wake_up_all(&rl->wait[READ]); + wake_up(&rl->wait[READ]); } if (rl->count[WRITE] >= q->nr_requests) { blk_set_queue_full(q, WRITE); - } else if (rl->count[WRITE] <= batch_requests(q)) { + } else if (rl->count[WRITE]+1 <= q->nr_requests) { blk_clear_queue_full(q, WRITE); - wake_up_all(&rl->wait[WRITE]); + wake_up(&rl->wait[WRITE]); } return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 13116a7a7969..69178ca80d7d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -59,6 +59,12 @@ struct io_context { atomic_t refcount; pid_t pid; + /* + * For request batching + */ + unsigned long last_waited; /* Time last woken after wait for request */ + int nr_batch_requests; /* Number of requests left in the batch */ + struct as_io_context *aic; }; |
