summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@digeo.com>2003-02-05 16:56:31 -0800
committerLinus Torvalds <torvalds@home.transmeta.com>2003-02-05 16:56:31 -0800
commit00c8e791cba1bb88db8a8fd73106c28fdbab5716 (patch)
treed1242ea5923a8b6fba8b95c909b8670b260f261e
parentc5070032bb8db845535ac8c85d45884138a08a4f (diff)
[PATCH] self-unplugging request queues
The patch teaches a queue to unplug itself: a) if is has four requests OR b) if it has had plugged requests for 3 milliseconds. These numbers may need to be tuned, although doing so doesn't seem to make much difference. 10 msecs works OK, so HZ=100 machines will be fine. Instrumentation shows that about 5-10% of requests were started due to the three millisecond timeout (during a kernel compile). That's somewhat significant. It means that the kernel is leaving stuff in the queue, plugged, for too long. This testing was with a uniprocessor preemptible kernel, which is particularly vulnerable to unplug latency (submit some IO, get preempted before the unplug). This patch permits the removal of a lot of rather lame unplugging in page reclaim and in the writeback code, which kicks the queues (globally!) every four megabytes to get writeback underway. This patch doesn't use blk_run_queues(). It is able to kick just the particular queue. The patch is not expected to make much difference really, except for AIO. AIO needs a blk_run_queues() in its io_submit() call. For each request. This means that AIO has to disable plugging altogether, unless something like this patch does it for it. It means that AIO will unplug *all* queues in the machine for every io_submit(). Even against a socket! This patch was tested by disabling blk_run_queues() completely. The system ran OK. The 3 milliseconds may be too long. It's OK for the heavy writeback code, but AIO may want less. Or maybe AIO really wants zero (ie: disable plugging). If that is so, we need new code paths by which AIO can communicate the "immediate unplug" information - a global unplug is not good. To minimise unplug latency due to user CPU load, this patch gives keventd `nice -10'. This is of course completely arbitrary. Really, I think keventd should be SCHED_RR/MAX_RT_PRIO-1, as it has been in -aa kernels for ages.
-rw-r--r--drivers/block/ll_rw_blk.c40
-rw-r--r--include/linux/blkdev.h10
-rw-r--r--kernel/workqueue.c1
3 files changed, 51 insertions, 0 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index f8a4e7a81f4b..e13d0bbca144 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -27,6 +27,8 @@
#include <linux/completion.h>
#include <linux/slab.h>
+static void blk_unplug_work(void *data);
+
/*
* For the allocated request tables
*/
@@ -237,6 +239,14 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
blk_queue_hardsect_size(q, 512);
blk_queue_dma_alignment(q, 511);
+ q->unplug_thresh = 4; /* hmm */
+ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
+ if (q->unplug_delay == 0)
+ q->unplug_delay = 1;
+
+ init_timer(&q->unplug_timer);
+ INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+
/*
* by default assume old behaviour and bounce for any highmem page
*/
@@ -960,6 +970,7 @@ void blk_plug_device(request_queue_t *q)
if (!blk_queue_plugged(q)) {
spin_lock(&blk_plug_lock);
list_add_tail(&q->plug_list, &blk_plug_list);
+ mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
spin_unlock(&blk_plug_lock);
}
}
@@ -974,6 +985,7 @@ int blk_remove_plug(request_queue_t *q)
if (blk_queue_plugged(q)) {
spin_lock(&blk_plug_lock);
list_del_init(&q->plug_list);
+ del_timer(&q->unplug_timer);
spin_unlock(&blk_plug_lock);
return 1;
}
@@ -992,6 +1004,8 @@ static inline void __generic_unplug_device(request_queue_t *q)
if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
return;
+ del_timer(&q->unplug_timer);
+
/*
* was plugged, fire request_fn if queue has stuff to do
*/
@@ -1020,6 +1034,18 @@ void generic_unplug_device(void *data)
spin_unlock_irq(q->queue_lock);
}
+static void blk_unplug_work(void *data)
+{
+ generic_unplug_device(data);
+}
+
+static void blk_unplug_timeout(unsigned long data)
+{
+ request_queue_t *q = (request_queue_t *)data;
+
+ schedule_work(&q->unplug_work);
+}
+
/**
* blk_start_queue - restart a previously stopped queue
* @q: The &request_queue_t in question
@@ -1164,6 +1190,9 @@ void blk_cleanup_queue(request_queue_t * q)
count -= __blk_cleanup_queue(&q->rq[READ]);
count -= __blk_cleanup_queue(&q->rq[WRITE]);
+ del_timer_sync(&q->unplug_timer);
+ flush_scheduled_work();
+
if (count)
printk("blk_cleanup_queue: leaked requests (%d)\n", count);
@@ -1269,6 +1298,9 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
blk_queue_make_request(q, __make_request);
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
+ q->unplug_timer.function = blk_unplug_timeout;
+ q->unplug_timer.data = (unsigned long)q;
+
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
@@ -1811,7 +1843,15 @@ get_rq:
out:
if (freereq)
__blk_put_request(q, freereq);
+
+ if (blk_queue_plugged(q)) {
+ int nr_queued = (queue_nr_requests - q->rq[0].count) +
+ (queue_nr_requests - q->rq[1].count);
+ if (nr_queued == q->unplug_thresh)
+ __generic_unplug_device(q);
+ }
spin_unlock_irq(q->queue_lock);
+
return 0;
end_io:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c599ea36233b..82766b7e60b0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -4,6 +4,8 @@
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
#include <linux/pagemap.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
@@ -188,6 +190,14 @@ struct request_queue
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
+ /*
+ * Auto-unplugging state
+ */
+ struct timer_list unplug_timer;
+ int unplug_thresh; /* After this many requests */
+ unsigned long unplug_delay; /* After this many jiffies */
+ struct work_struct unplug_work;
+
struct backing_dev_info backing_dev_info;
/*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 156583c7dbf7..fb10d360c436 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -177,6 +177,7 @@ static int worker_thread(void *__startup)
current->flags |= PF_IOTHREAD;
cwq->thread = current;
+ set_user_nice(current, -10);
set_cpus_allowed(current, 1UL << cpu);
spin_lock_irq(&current->sig->siglock);