summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/i386/boot/compressed/misc.c2
-rw-r--r--arch/i386/kernel/smpboot.c6
-rw-r--r--arch/i386/kernel/sys_i386.c3
-rw-r--r--drivers/block/Makefile4
-rw-r--r--drivers/block/deadline-iosched.c557
-rw-r--r--drivers/block/elevator.c118
-rw-r--r--drivers/block/ll_rw_blk.c19
-rw-r--r--drivers/block/loop.c14
-rw-r--r--drivers/ide/pci/cy82c693.c29
-rw-r--r--drivers/ide/pci/cy82c693.h8
-rw-r--r--drivers/ide/setup-pci.c1
-rw-r--r--drivers/pnp/pnpbios_proc.c14
-rw-r--r--drivers/scsi/3w-xxxx.c22
-rw-r--r--drivers/scsi/3w-xxxx.h11
-rw-r--r--fs/buffer.c20
-rw-r--r--fs/dcache.c30
-rw-r--r--fs/dquot.c19
-rw-r--r--fs/inode.c29
-rw-r--r--fs/locks.c14
-rw-r--r--include/asm-i386/io.h1
-rw-r--r--include/asm-i386/semaphore.h4
-rw-r--r--include/linux/dcache.h2
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/pagemap.h8
-rw-r--r--include/linux/rwsem.h2
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/wait.h26
-rw-r--r--kernel/exit.c46
-rw-r--r--kernel/fork.c46
-rw-r--r--kernel/ksyms.c8
-rw-r--r--kernel/pid.c9
-rw-r--r--kernel/sched.c17
-rw-r--r--kernel/timer.c14
-rw-r--r--mm/filemap.c55
-rw-r--r--mm/mprotect.c4
-rw-r--r--mm/page_alloc.c14
-rw-r--r--mm/pdflush.c36
-rw-r--r--mm/slab.c11
-rw-r--r--mm/vmscan.c51
41 files changed, 935 insertions, 360 deletions
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index 42ce2febe8b7..fcec73a7e379 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -121,7 +121,7 @@ static int vidport;
static int lines, cols;
#ifdef CONFIG_MULTIQUAD
-static void * const xquad_portio = NULL;
+static void * xquad_portio = NULL;
#endif
#include "../../../../lib/inflate.c"
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8a04f3d2c8aa..9d513dc1ceb2 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1060,11 +1060,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (clustered_apic_mode && (numnodes > 1)) {
printk("Remapping cross-quad port I/O for %d quads\n",
numnodes);
+ xquad_portio = ioremap (XQUAD_PORTIO_BASE,
+ numnodes * XQUAD_PORTIO_QUAD);
printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
(u_long) xquad_portio,
- (u_long) numnodes * XQUAD_PORTIO_LEN);
- xquad_portio = ioremap (XQUAD_PORTIO_BASE,
- numnodes * XQUAD_PORTIO_LEN);
+ (u_long) numnodes * XQUAD_PORTIO_QUAD);
}
/*
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index f7042004ead4..69b1805151ad 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -272,10 +272,9 @@ get_addr(unsigned long addr, unsigned long len)
return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start))
goto found_addr;
- addr = vma->vm_end;
+ addr = HPAGE_ALIGN(vma->vm_end);
}
found_addr:
- addr = HPAGE_ALIGN(addr);
return addr;
}
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 2b40242f3ab2..eff7ee947ea7 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,9 +9,9 @@
#
export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
- block_ioctl.o
+ block_ioctl.o deadline-iosched.o
-obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o
+obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
new file mode 100644
index 000000000000..a32d24ef7c50
--- /dev/null
+++ b/drivers/block/deadline-iosched.c
@@ -0,0 +1,557 @@
+/*
+ * linux/drivers/block/deadline-iosched.c
+ *
+ * Deadline i/o scheduler.
+ *
+ * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ */
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/bio.h>
+#include <linux/blk.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/hash.h>
+
+/*
+ * feel free to try other values :-). read_expire value is the timeout for
+ * reads, our goal is to start a request "around" the time when it expires.
+ * fifo_batch is how many steps along the sorted list we will take when the
+ * front fifo request expires.
+ */
+static int read_expire = HZ / 2; /* 500ms start timeout */
+static int fifo_batch = 64; /* 4 seeks, or 64 contig */
+static int seek_cost = 16; /* seek is 16 times more expensive */
+
+/*
+ * how many times reads are allowed to starve writes
+ */
+static int writes_starved = 2;
+
+static const int deadline_hash_shift = 8;
+#define DL_HASH_BLOCK(sec) ((sec) >> 3)
+#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
+#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
+
+#define DL_INVALIDATE_HASH(dd) \
+ do { \
+ if (!++(dd)->hash_valid_count) \
+ (dd)->hash_valid_count = 1; \
+ } while (0)
+
+struct deadline_data {
+ /*
+ * run time data
+ */
+ struct list_head sort_list[2]; /* sorted listed */
+ struct list_head read_fifo; /* fifo list */
+ struct list_head *dispatch; /* driver dispatch queue */
+ struct list_head *hash; /* request hash */
+ sector_t last_sector; /* last sector sent to drive */
+ unsigned long hash_valid_count; /* barrier hash count */
+ unsigned int starved; /* writes starved */
+
+ /*
+ * settings that change how the i/o scheduler behaves
+ */
+ unsigned int fifo_batch;
+ unsigned long read_expire;
+ unsigned int seek_cost;
+ unsigned int writes_starved;
+};
+
+/*
+ * pre-request data.
+ */
+struct deadline_rq {
+ struct list_head fifo;
+ struct list_head hash;
+ unsigned long hash_valid_count;
+ struct request *request;
+ unsigned long expires;
+};
+
+static kmem_cache_t *drq_pool;
+
+#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
+
+/*
+ * rq hash
+ */
+static inline void __deadline_del_rq_hash(struct deadline_rq *drq)
+{
+ drq->hash_valid_count = 0;
+ list_del_init(&drq->hash);
+}
+
+#define ON_HASH(drq) (drq)->hash_valid_count
+static inline void deadline_del_rq_hash(struct deadline_rq *drq)
+{
+ if (ON_HASH(drq))
+ __deadline_del_rq_hash(drq);
+}
+
+static inline void
+deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq)
+{
+ struct request *rq = drq->request;
+
+ BUG_ON(ON_HASH(drq));
+
+ drq->hash_valid_count = dd->hash_valid_count;
+ list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]);
+}
+
+#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
+static struct request *
+deadline_find_hash(struct deadline_data *dd, sector_t offset)
+{
+ struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
+ struct list_head *entry, *next = hash_list->next;
+ struct deadline_rq *drq;
+ struct request *rq = NULL;
+
+ while ((entry = next) != hash_list) {
+ next = entry->next;
+ drq = list_entry_hash(entry);
+
+ BUG_ON(!drq->hash_valid_count);
+
+ if (!rq_mergeable(drq->request)
+ || drq->hash_valid_count != dd->hash_valid_count) {
+ __deadline_del_rq_hash(drq);
+ continue;
+ }
+
+ if (drq->request->sector + drq->request->nr_sectors == offset) {
+ rq = drq->request;
+ break;
+ }
+ }
+
+ return rq;
+}
+
+static int
+deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+ const int data_dir = bio_data_dir(bio);
+ struct list_head *entry, *sort_list;
+ struct deadline_rq *drq;
+ struct request *__rq;
+ int ret = ELEVATOR_NO_MERGE;
+
+ /*
+ * try last_merge to avoid going to hash
+ */
+ ret = elv_try_last_merge(q, req, bio);
+ if (ret != ELEVATOR_NO_MERGE)
+ goto out;
+
+ /*
+ * see if the merge hash can satisfy a back merge
+ */
+ if ((__rq = deadline_find_hash(dd, bio->bi_sector))) {
+ BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
+
+ if (elv_rq_merge_ok(__rq, bio)) {
+ *req = __rq;
+ q->last_merge = &__rq->queuelist;
+ ret = ELEVATOR_BACK_MERGE;
+ goto out_ret;
+ }
+ }
+
+ entry = sort_list = &dd->sort_list[data_dir];
+ while ((entry = entry->prev) != sort_list) {
+ __rq = list_entry_rq(entry);
+ drq = RQ_DATA(__rq);
+
+ BUG_ON(__rq->flags & REQ_STARTED);
+
+ if (!(__rq->flags & REQ_CMD))
+ continue;
+
+ if (!*req && bio_rq_in_between(bio, __rq, sort_list))
+ *req = __rq;
+
+ if (__rq->flags & REQ_BARRIER)
+ break;
+
+ /*
+ * checking for a front merge, hash will miss those
+ */
+ if (__rq->sector - bio_sectors(bio) == bio->bi_sector) {
+ ret = elv_try_merge(__rq, bio);
+ if (ret != ELEVATOR_NO_MERGE) {
+ *req = __rq;
+ q->last_merge = &__rq->queuelist;
+ break;
+ }
+ }
+ }
+
+out:
+ if (ret != ELEVATOR_NO_MERGE) {
+ struct deadline_rq *drq = RQ_DATA(*req);
+
+ deadline_del_rq_hash(drq);
+ deadline_add_rq_hash(dd, drq);
+ }
+out_ret:
+ return ret;
+}
+
+static void
+deadline_merge_request(request_queue_t *q, struct request *req, struct request *next)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_rq *drq = RQ_DATA(req);
+ struct deadline_rq *dnext = RQ_DATA(next);
+
+ BUG_ON(!drq);
+ BUG_ON(!dnext);
+
+ deadline_del_rq_hash(drq);
+ deadline_add_rq_hash(dd, drq);
+
+ /*
+ * if dnext expires before drq, assign it's expire time to drq
+ * and move into dnext position (dnext will be deleted) in fifo
+ */
+ if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
+ if (time_before(dnext->expires, drq->expires)) {
+ list_move(&drq->fifo, &dnext->fifo);
+ drq->expires = dnext->expires;
+ }
+ }
+}
+
+/*
+ * move request from sort list to dispatch queue. maybe remove from rq hash
+ * here too?
+ */
+static inline void
+deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
+{
+ struct deadline_rq *drq = RQ_DATA(rq);
+
+ list_move_tail(&rq->queuelist, dd->dispatch);
+ list_del_init(&drq->fifo);
+}
+
+/*
+ * move along sort list and move entries to dispatch queue, starting from rq
+ */
+static void deadline_move_requests(struct deadline_data *dd, struct request *rq)
+{
+ struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)];
+ sector_t last_sec = dd->last_sector;
+ int batch_count = dd->fifo_batch;
+
+ do {
+ struct list_head *nxt = rq->queuelist.next;
+
+ /*
+ * take it off the sort and fifo list, move
+ * to dispatch queue
+ */
+ deadline_move_to_dispatch(dd, rq);
+
+ if (rq->sector == last_sec)
+ batch_count--;
+ else
+ batch_count -= dd->seek_cost;
+
+ if (nxt == sort_head)
+ break;
+
+ last_sec = rq->sector + rq->nr_sectors;
+ rq = list_entry_rq(nxt);
+ } while (batch_count > 0);
+}
+
+/*
+ * returns 0 if there are no expired reads on the fifo, 1 otherwise
+ */
+#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
+static inline int deadline_check_fifo(struct deadline_data *dd)
+{
+ struct deadline_rq *drq;
+
+ if (list_empty(&dd->read_fifo))
+ return 0;
+
+ drq = list_entry_fifo(dd->read_fifo.next);
+ if (time_before(jiffies, drq->expires))
+ return 0;
+
+ return 1;
+}
+
+static struct request *deadline_next_request(request_queue_t *q)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_rq *drq;
+ struct list_head *nxt;
+ struct request *rq;
+ int writes;
+
+ /*
+ * if still requests on the dispatch queue, just grab the first one
+ */
+ if (!list_empty(&q->queue_head)) {
+dispatch:
+ rq = list_entry_rq(q->queue_head.next);
+ dd->last_sector = rq->sector + rq->nr_sectors;
+ return rq;
+ }
+
+ writes = !list_empty(&dd->sort_list[WRITE]);
+
+ /*
+ * if we have expired entries on the fifo list, move some to dispatch
+ */
+ if (deadline_check_fifo(dd)) {
+ if (writes && (dd->starved++ >= dd->writes_starved))
+ goto dispatch_writes;
+
+ nxt = dd->read_fifo.next;
+ drq = list_entry_fifo(nxt);
+ deadline_move_requests(dd, drq->request);
+ goto dispatch;
+ }
+
+ if (!list_empty(&dd->sort_list[READ])) {
+ if (writes && (dd->starved++ >= dd->writes_starved))
+ goto dispatch_writes;
+
+ nxt = dd->sort_list[READ].next;
+ deadline_move_requests(dd, list_entry_rq(nxt));
+ goto dispatch;
+ }
+
+ /*
+ * either there are no reads expired or on sort list, or the reads
+ * have starved writes for too long. dispatch some writes
+ */
+ if (writes) {
+dispatch_writes:
+ nxt = dd->sort_list[WRITE].next;
+ deadline_move_requests(dd, list_entry_rq(nxt));
+ dd->starved = 0;
+ goto dispatch;
+ }
+
+ BUG_ON(!list_empty(&dd->sort_list[READ]));
+ BUG_ON(writes);
+ return NULL;
+}
+
+static void
+deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_rq *drq = RQ_DATA(rq);
+ const int data_dir = rq_data_dir(rq);
+
+ /*
+ * flush hash on barrier insert, as not to allow merges before a
+ * barrier.
+ */
+ if (unlikely(rq->flags & REQ_BARRIER)) {
+ DL_INVALIDATE_HASH(dd);
+ q->last_merge = NULL;
+ }
+
+ /*
+ * add to sort list
+ */
+ if (!insert_here)
+ insert_here = dd->sort_list[data_dir].prev;
+
+ list_add(&rq->queuelist, insert_here);
+
+ if (unlikely(!(rq->flags & REQ_CMD)))
+ return;
+
+ if (rq_mergeable(rq)) {
+ deadline_add_rq_hash(dd, drq);
+
+ if (!q->last_merge)
+ q->last_merge = &rq->queuelist;
+ }
+
+ if (data_dir == READ) {
+ /*
+ * set expire time and add to fifo list
+ */
+ drq->expires = jiffies + dd->read_expire;
+ list_add_tail(&drq->fifo, &dd->read_fifo);
+ }
+}
+
+static void deadline_remove_request(request_queue_t *q, struct request *rq)
+{
+ struct deadline_rq *drq = RQ_DATA(rq);
+
+ if (drq) {
+ list_del_init(&drq->fifo);
+ deadline_del_rq_hash(drq);
+ }
+}
+
+static int deadline_queue_empty(request_queue_t *q)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+
+ if (!list_empty(&q->queue_head) || !list_empty(&dd->sort_list[READ])
+ || !list_empty(&dd->sort_list[WRITE]))
+ return 0;
+
+ BUG_ON(!list_empty(&dd->read_fifo));
+ return 1;
+}
+
+static struct list_head *
+deadline_get_sort_head(request_queue_t *q, struct request *rq)
+{
+ struct deadline_data *dd = q->elevator.elevator_data;
+
+ return &dd->sort_list[rq_data_dir(rq)];
+}
+
+static void deadline_exit(request_queue_t *q, elevator_t *e)
+{
+ struct deadline_data *dd = e->elevator_data;
+ struct deadline_rq *drq;
+ struct request *rq;
+ int i;
+
+ BUG_ON(!list_empty(&dd->read_fifo));
+ BUG_ON(!list_empty(&dd->sort_list[READ]));
+ BUG_ON(!list_empty(&dd->sort_list[WRITE]));
+
+ for (i = READ; i <= WRITE; i++) {
+ struct request_list *rl = &q->rq[i];
+ struct list_head *entry = &rl->free;
+
+ if (list_empty(&rl->free))
+ continue;
+
+ while ((entry = entry->next) != &rl->free) {
+ rq = list_entry_rq(entry);
+
+ if ((drq = RQ_DATA(rq)) == NULL)
+ continue;
+
+ rq->elevator_private = NULL;
+ kmem_cache_free(drq_pool, drq);
+ }
+ }
+
+ kfree(dd->hash);
+ kfree(dd);
+}
+
+/*
+ * initialize elevator private data (deadline_data), and alloc a drq for
+ * each request on the free lists
+ */
+static int deadline_init(request_queue_t *q, elevator_t *e)
+{
+ struct deadline_data *dd;
+ struct deadline_rq *drq;
+ struct request *rq;
+ int i, ret = 0;
+
+ if (!drq_pool)
+ return -ENOMEM;
+
+ dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+ if (!dd)
+ return -ENOMEM;
+ memset(dd, 0, sizeof(*dd));
+
+ dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL);
+ if (!dd->hash) {
+ kfree(dd);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < DL_HASH_ENTRIES; i++)
+ INIT_LIST_HEAD(&dd->hash[i]);
+
+ INIT_LIST_HEAD(&dd->read_fifo);
+ INIT_LIST_HEAD(&dd->sort_list[READ]);
+ INIT_LIST_HEAD(&dd->sort_list[WRITE]);
+ dd->dispatch = &q->queue_head;
+ dd->fifo_batch = fifo_batch;
+ dd->read_expire = read_expire;
+ dd->seek_cost = seek_cost;
+ dd->hash_valid_count = 1;
+ dd->writes_starved = writes_starved;
+ e->elevator_data = dd;
+
+ for (i = READ; i <= WRITE; i++) {
+ struct request_list *rl = &q->rq[i];
+ struct list_head *entry = &rl->free;
+
+ if (list_empty(&rl->free))
+ continue;
+
+ while ((entry = entry->next) != &rl->free) {
+ rq = list_entry_rq(entry);
+
+ drq = kmem_cache_alloc(drq_pool, GFP_KERNEL);
+ if (!drq) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ memset(drq, 0, sizeof(*drq));
+ INIT_LIST_HEAD(&drq->fifo);
+ INIT_LIST_HEAD(&drq->hash);
+ drq->request = rq;
+ rq->elevator_private = drq;
+ }
+ }
+
+ if (ret)
+ deadline_exit(q, e);
+
+ return ret;
+}
+
+static int __init deadline_slab_setup(void)
+{
+ drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ if (!drq_pool)
+ panic("deadline: can't init slab pool\n");
+
+ return 0;
+}
+
+module_init(deadline_slab_setup);
+
+elevator_t iosched_deadline = {
+ .elevator_merge_fn = deadline_merge,
+ .elevator_merge_req_fn = deadline_merge_request,
+ .elevator_next_req_fn = deadline_next_request,
+ .elevator_add_req_fn = deadline_add_request,
+ .elevator_remove_req_fn = deadline_remove_request,
+ .elevator_queue_empty_fn = deadline_queue_empty,
+ .elevator_get_sort_head_fn = deadline_get_sort_head,
+ .elevator_init_fn = deadline_init,
+ .elevator_exit_fn = deadline_exit,
+};
+
+EXPORT_SYMBOL(iosched_deadline);
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 10729a1f0c1c..68f2ded9d86e 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -157,114 +157,6 @@ inline int elv_try_last_merge(request_queue_t *q, struct request **req,
return ret;
}
-static int bio_rq_before(struct bio *bio, struct request *rq)
-{
- if (!kdev_same(to_kdev_t(bio->bi_bdev->bd_dev), rq->rq_dev))
- return 0;
- return bio->bi_sector < rq->sector;
-}
-
-/*
- * elevator_linux starts here
- */
-int elevator_linus_merge(request_queue_t *q, struct request **req,
- struct bio *bio)
-{
- struct list_head *entry, *good;
- struct request *__rq;
- int ret;
-
- if ((ret = elv_try_last_merge(q, req, bio)))
- return ret;
-
- entry = &q->queue_head;
- good = &q->queue_head;
- ret = ELEVATOR_NO_MERGE;
- while ((entry = entry->prev) != &q->queue_head) {
- __rq = list_entry_rq(entry);
-
- if (__rq->flags & (REQ_BARRIER | REQ_STARTED))
- break;
- if (!(__rq->flags & REQ_CMD))
- break;
-
- if (bio_data_dir(bio) != rq_data_dir(__rq)) {
- if (bio_data_dir(bio) == WRITE)
- break;
- good = entry->prev;
- continue;
- }
-
- ret = elv_try_merge(__rq, bio);
- if (ret) {
- *req = __rq;
- q->last_merge = &__rq->queuelist;
- return ret;
- }
-
- if (bio_rq_before(bio, __rq))
- good = entry->prev;
-
- }
-
- if (good != &q->queue_head)
- *req = list_entry_rq(good);
-
- return ELEVATOR_NO_MERGE;
-}
-
-void elevator_linus_merge_req(request_queue_t *q, struct request *req,
- struct request *next)
-{
- if (elv_linus_sequence(next) < elv_linus_sequence(req))
- elv_linus_sequence(req) = elv_linus_sequence(next);
-}
-
-void elevator_linus_add_request(request_queue_t *q, struct request *rq,
- struct list_head *insert_here)
-{
- elevator_t *e = &q->elevator;
- int lat = 0, *latency = e->elevator_data;
-
- if (!insert_here)
- insert_here = q->queue_head.prev;
-
- if (!(rq->flags & REQ_BARRIER))
- lat = latency[rq_data_dir(rq)];
-
- elv_linus_sequence(rq) = lat;
-
- list_add(&rq->queuelist, insert_here);
-
- /*
- * new merges must not precede this barrier
- */
- if (rq->flags & REQ_BARRIER)
- q->last_merge = NULL;
- else if (!q->last_merge)
- q->last_merge = &rq->queuelist;
-}
-
-int elevator_linus_init(request_queue_t *q, elevator_t *e)
-{
- int *latency;
-
- latency = kmalloc(2 * sizeof(int), GFP_KERNEL);
- if (!latency)
- return -ENOMEM;
-
- latency[READ] = 1024;
- latency[WRITE] = 2048;
-
- e->elevator_data = latency;
- return 0;
-}
-
-void elevator_linus_exit(request_queue_t *q, elevator_t *e)
-{
- kfree(e->elevator_data);
-}
-
/*
* elevator noop
*
@@ -442,15 +334,6 @@ inline struct list_head *elv_get_sort_head(request_queue_t *q,
return &q->queue_head;
}
-elevator_t elevator_linus = {
- elevator_merge_fn: elevator_linus_merge,
- elevator_merge_req_fn: elevator_linus_merge_req,
- elevator_next_req_fn: elevator_noop_next_request,
- elevator_add_req_fn: elevator_linus_add_request,
- elevator_init_fn: elevator_linus_init,
- elevator_exit_fn: elevator_linus_exit,
-};
-
elevator_t elevator_noop = {
elevator_merge_fn: elevator_noop_merge,
elevator_next_req_fn: elevator_noop_next_request,
@@ -459,7 +342,6 @@ elevator_t elevator_noop = {
module_init(elevator_global_init);
-EXPORT_SYMBOL(elevator_linus);
EXPORT_SYMBOL(elevator_noop);
EXPORT_SYMBOL(__elv_add_request);
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 605f474d2f16..a2595200d838 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
if (blk_init_free_list(q))
return -ENOMEM;
- if ((ret = elevator_init(q, &q->elevator, elevator_linus))) {
+ if ((ret = elevator_init(q, &q->elevator, iosched_deadline))) {
blk_cleanup_queue(q);
return ret;
}
@@ -1233,24 +1233,23 @@ static struct request *get_request(request_queue_t *q, int rw)
*/
static struct request *get_request_wait(request_queue_t *q, int rw)
{
- DECLARE_WAITQUEUE(wait, current);
+ DEFINE_WAIT(wait);
struct request_list *rl = &q->rq[rw];
struct request *rq;
spin_lock_prefetch(q->queue_lock);
generic_unplug_device(q);
- add_wait_queue_exclusive(&rl->wait, &wait);
do {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ prepare_to_wait_exclusive(&rl->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
if (!rl->count)
schedule();
+ finish_wait(&rl->wait, &wait);
spin_lock_irq(q->queue_lock);
rq = get_request(q, rw);
spin_unlock_irq(q->queue_lock);
} while (rq == NULL);
- remove_wait_queue(&rl->wait, &wait);
- current->state = TASK_RUNNING;
return rq;
}
@@ -1460,18 +1459,16 @@ void blk_put_request(struct request *req)
*/
void blk_congestion_wait(int rw, long timeout)
{
- DECLARE_WAITQUEUE(wait, current);
+ DEFINE_WAIT(wait);
struct congestion_state *cs = &congestion_states[rw];
if (atomic_read(&cs->nr_congested_queues) == 0)
return;
blk_run_queues();
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&cs->wqh, &wait);
+ prepare_to_wait(&cs->wqh, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(&cs->nr_congested_queues) != 0)
schedule_timeout(timeout);
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&cs->wqh, &wait);
+ finish_wait(&cs->wqh, &wait);
}
/*
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ccfa6f776ef0..d55beac14697 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -157,18 +157,12 @@ struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
#define MAX_DISK_SIZE 1024*1024*1024
-static unsigned long
-compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry)
-{
- loff_t size = lo_dentry->d_inode->i_mapping->host->i_size;
- return (size - lo->lo_offset) >> BLOCK_SIZE_BITS;
-}
-
static void figure_loop_size(struct loop_device *lo)
{
- set_capacity(disks + lo->lo_number, compute_loop_size(lo,
- lo->lo_backing_file->f_dentry));
-
+ loff_t size = lo->lo_backing_file->f_dentry->d_inode->i_size;
+
+ set_capacity(disks + lo->lo_number,
+ (size - lo->lo_offset) >> 9);
}
static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index 06b5495a79fd..cbb0abb97407 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -338,6 +338,9 @@ static void cy82c693_tune_drive (ide_drive_t *drive, u8 pio)
*/
unsigned int __init init_chipset_cy82c693(struct pci_dev *dev, const char *name)
{
+ if (PCI_FUNC(dev->devfn) != 1)
+ return 0;
+
#ifdef CY82C693_SETDMA_CLOCK
u8 data = 0;
#endif /* CY82C693_SETDMA_CLOCK */
@@ -411,20 +414,30 @@ void __init init_hwif_cy82c693(ide_hwif_t *hwif)
#endif /* CONFIG_BLK_DEV_IDEDMA */
}
-void __init init_dma_cy82c693 (ide_hwif_t *hwif, unsigned long dmabase)
+static __initdata ide_hwif_t *primary;
+
+void __init init_iops_cy82c693(ide_hwif_t *hwif)
{
- ide_setup_dma(hwif, dmabase, 8);
+ if (PCI_FUNC(hwif->pci_dev->devfn) == 1)
+ primary = hwif;
+ else {
+ hwif->mate = primary;
+ hwif->channel = 1;
+ }
}
-extern void ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *);
-
static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{
ide_pci_device_t *d = &cy82c693_chipsets[id->driver_data];
- if ((!(PCI_FUNC(dev->devfn) & 1) ||
- (!((dev->class >> 8) == PCI_CLASS_STORAGE_IDE))))
- return 0; /* CY82C693 is more than only a IDE controller */
- ide_setup_pci_device(dev, d);
+ struct pci_dev *dev2;
+
+ /* CY82C693 is more than only a IDE controller.
+ Function 1 is primary IDE channel, function 2 - secondary. */
+ if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
+ PCI_FUNC(dev->devfn) == 1) {
+ dev2 = pci_find_slot(dev->bus->number, dev->devfn + 1);
+ ide_setup_pci_devices(dev, dev2, d);
+ }
return 0;
}
diff --git a/drivers/ide/pci/cy82c693.h b/drivers/ide/pci/cy82c693.h
index b5c6f9652d51..d7c8d19a8523 100644
--- a/drivers/ide/pci/cy82c693.h
+++ b/drivers/ide/pci/cy82c693.h
@@ -66,7 +66,7 @@ typedef struct pio_clocks_s {
extern unsigned int init_chipset_cy82c693(struct pci_dev *, const char *);
extern void init_hwif_cy82c693(ide_hwif_t *);
-extern void init_dma_cy82c693(ide_hwif_t *, unsigned long);
+extern void init_iops_cy82c693(ide_hwif_t *);
static ide_pci_device_t cy82c693_chipsets[] __initdata = {
{ /* 0 */
@@ -74,10 +74,10 @@ static ide_pci_device_t cy82c693_chipsets[] __initdata = {
device: PCI_DEVICE_ID_CONTAQ_82C693,
name: "CY82C693",
init_chipset: init_chipset_cy82c693,
- init_iops: NULL,
+ init_iops: init_iops_cy82c693,
init_hwif: init_hwif_cy82c693,
- init_dma: init_dma_cy82c693,
- channels: 2,
+ init_dma: NULL,
+ channels: 1,
autodma: AUTODMA,
enablebits: {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
bootable: ON_BOARD,
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 29dfacd9b29d..8ef4fc169dec 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -250,6 +250,7 @@ second_chance_to_dma:
switch(dev->device) {
case PCI_DEVICE_ID_AL_M5219:
+ case PCI_DEVICE_ID_AL_M5229:
case PCI_DEVICE_ID_AMD_VIPER_7409:
case PCI_DEVICE_ID_CMD_643:
case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
diff --git a/drivers/pnp/pnpbios_proc.c b/drivers/pnp/pnpbios_proc.c
index 6d7c5e1b1321..56130105496e 100644
--- a/drivers/pnp/pnpbios_proc.c
+++ b/drivers/pnp/pnpbios_proc.c
@@ -68,6 +68,7 @@ static int proc_read_escdinfo(char *buf, char **start, off_t pos,
);
}
+#define MAX_SANE_ESCD_SIZE (32*1024)
static int proc_read_escd(char *buf, char **start, off_t pos,
int count, int *eof, void *data)
{
@@ -79,8 +80,8 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
return -EIO;
/* sanity check */
- if (escd.escd_size > (32*1024)) {
- printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size is too great\n");
+ if (escd.escd_size > MAX_SANE_ESCD_SIZE) {
+ printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n");
return -EFBIG;
}
@@ -90,7 +91,14 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base))
return -EIO;
- escd_size = (unsigned char)(buf[0]) + (unsigned char)(buf[1])*256;
+ escd_size = (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1])*256;
+
+ /* sanity check */
+ if (escd_size > MAX_SANE_ESCD_SIZE) {
+ printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n");
+ return -EFBIG;
+ }
+
escd_left_to_read = escd_size - pos;
if (escd_left_to_read < 0) escd_left_to_read = 0;
if (escd_left_to_read == 0) *eof = 1;
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 3e71b678fb64..ea3a9df5f953 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -148,6 +148,11 @@
Fix bug in raw command post with data ioctl method.
Fix bug where rollcall sometimes failed with cable errors.
Print unit # on all command timeouts.
+ 1.02.00.026 - Fix possible infinite retry bug with power glitch induced
+ drive timeouts.
+ Cleanup some AEN severity levels.
+ 1.02.00.027 - Add drive not supported AEN code for SATA controllers.
+ Remove spurious unknown ioctl error message.
*/
#include <linux/module.h>
@@ -201,7 +206,7 @@ static struct notifier_block tw_notifier = {
};
/* Globals */
-char *tw_driver_version="1.02.00.025";
+char *tw_driver_version="1.02.00.027";
TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT];
int tw_device_extension_count = 0;
@@ -212,7 +217,7 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
{
TW_Param *param;
unsigned short aen;
- int error = 0;
+ int error = 0, table_max = 0;
dprintk(KERN_WARNING "3w-xxxx: tw_aen_complete()\n");
if (tw_dev->alignment_virtual_address[request_id] == NULL) {
@@ -227,7 +232,8 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: INFO: AEN queue overflow.\n", tw_dev->host->host_no);
} else {
- if ((aen & 0x0ff) < TW_AEN_STRING_MAX) {
+ table_max = sizeof(tw_aen_string)/sizeof(char *);
+ if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: %s%d.\n", tw_dev->host->host_no, tw_aen_string[aen & 0xff], aen >> 8);
} else {
@@ -289,7 +295,7 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
int first_reset = 0;
int queue = 0;
int imax, i;
- int found = 0;
+ int found = 0, table_max = 0;
dprintk(KERN_NOTICE "3w-xxxx: tw_aen_drain_queue()\n");
@@ -409,7 +415,8 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: AEN: INFO: AEN queue overflow.\n");
} else {
- if ((aen & 0x0ff) < TW_AEN_STRING_MAX) {
+ table_max = sizeof(tw_aen_string)/sizeof(char *);
+ if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: AEN: %s%d.\n", tw_aen_string[aen & 0xff], aen >> 8);
} else {
@@ -1442,7 +1449,8 @@ static void tw_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
/* If error, command failed */
if (error == 1) {
- tw_dev->srb[request_id]->result = (DID_RESET << 16);
+ /* Ask for a host reset */
+ tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
}
/* Now complete the io */
@@ -1784,7 +1792,7 @@ int tw_ioctl(TW_Device_Extension *tw_dev, int request_id)
return 1;
}
default:
- printk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode);
+ dprintk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode);
tw_dev->state[request_id] = TW_S_COMPLETED;
tw_state_request_finish(tw_dev, request_id);
tw_dev->srb[request_id]->result = (DID_OK << 16);
diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h
index 3f96e3753da3..5a5d7f6a056f 100644
--- a/drivers/scsi/3w-xxxx.h
+++ b/drivers/scsi/3w-xxxx.h
@@ -90,14 +90,13 @@ static char *tw_aen_string[] = {
"INFO: Verify started: Unit #", // 0x029
"ERROR: Verify failed: Port #", // 0x02A
"INFO: Verify complete: Unit #", // 0x02B
- "ERROR: Overwrote bad sector during rebuild: Port #", //0x02C
- "ERROR: Encountered bad sector during rebuild: Port #", //0x02D
- "INFO: Replacement drive is too small: Port #", //0x02E
- "WARNING: Verify error: Unit not previously initialized: Unit #" //0x02F
+ "WARNING: Overwrote bad sector during rebuild: Port #", //0x02C
+ "ERROR: Encountered bad sector during rebuild: Port #", //0x02D
+ "ERROR: Replacement drive is too small: Port #", //0x02E
+ "WARNING: Verify error: Unit not previously initialized: Unit #", //0x02F
+ "ERROR: Drive not supported: Port #" // 0x030
};
-#define TW_AEN_STRING_MAX 0x030
-
/*
Sense key lookup table
Format: ESDC/flags,SenseKey,AdditionalSenseCode,AdditionalSenseCodeQualifier
diff --git a/fs/buffer.c b/fs/buffer.c
index 0b9766099e3d..4f1c380230be 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -128,22 +128,18 @@ void unlock_buffer(struct buffer_head *bh)
*/
void __wait_on_buffer(struct buffer_head * bh)
{
- wait_queue_head_t *wq = bh_waitq_head(bh);
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ wait_queue_head_t *wqh = bh_waitq_head(bh);
+ DEFINE_WAIT(wait);
get_bh(bh);
- add_wait_queue(wq, &wait);
do {
+ prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
blk_run_queues();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (!buffer_locked(bh))
- break;
- schedule();
+ if (buffer_locked(bh))
+ schedule();
} while (buffer_locked(bh));
- tsk->state = TASK_RUNNING;
- remove_wait_queue(wq, &wait);
put_bh(bh);
+ finish_wait(wqh, &wait);
}
static inline void
@@ -246,10 +242,12 @@ int fsync_bdev(struct block_device *bdev)
}
/*
- * sync everything.
+ * sync everything. Start out by waking pdflush, because that writes back
+ * all queues in parallel.
*/
asmlinkage long sys_sync(void)
{
+ wakeup_bdflush(0);
sync_inodes(0); /* All mappings and inodes, including block devices */
DQUOT_SYNC(NULL);
sync_supers(); /* Write the superblocks */
diff --git a/fs/dcache.c b/fs/dcache.c
index ac127d32eed9..1715f006ccd4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -329,12 +329,11 @@ static inline void prune_one_dentry(struct dentry * dentry)
void prune_dcache(int count)
{
spin_lock(&dcache_lock);
- for (;;) {
+ for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
tmp = dentry_unused.prev;
-
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
@@ -349,12 +348,8 @@ void prune_dcache(int count)
dentry_stat.nr_unused--;
/* Unused dentry with a count? */
- if (atomic_read(&dentry->d_count))
- BUG();
-
+ BUG_ON(atomic_read(&dentry->d_count));
prune_one_dentry(dentry);
- if (!--count)
- break;
}
spin_unlock(&dcache_lock);
}
@@ -573,19 +568,11 @@ void shrink_dcache_anon(struct list_head *head)
/*
* This is called from kswapd when we think we need some
- * more memory, but aren't really sure how much. So we
- * carefully try to free a _bit_ of our dcache, but not
- * too much.
- *
- * Priority:
- * 1 - very urgent: shrink everything
- * ...
- * 6 - base-level: try to shrink a bit.
+ * more memory.
*/
-int shrink_dcache_memory(int priority, unsigned int gfp_mask)
+int shrink_dcache_memory(int ratio, unsigned int gfp_mask)
{
- int count = 0;
-
+ int entries = dentry_stat.nr_dentry / ratio + 1;
/*
* Nasty deadlock avoidance.
*
@@ -600,11 +587,8 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask)
if (!(gfp_mask & __GFP_FS))
return 0;
- count = dentry_stat.nr_unused / priority;
-
- prune_dcache(count);
- kmem_cache_shrink(dentry_cache);
- return 0;
+ prune_dcache(entries);
+ return entries;
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
diff --git a/fs/dquot.c b/fs/dquot.c
index 58095d92cbee..3b1efaef018a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -480,26 +480,17 @@ static void prune_dqcache(int count)
/*
* This is called from kswapd when we think we need some
- * more memory, but aren't really sure how much. So we
- * carefully try to free a _bit_ of our dqcache, but not
- * too much.
- *
- * Priority:
- * 1 - very urgent: shrink everything
- * ...
- * 6 - base-level: try to shrink a bit.
+ * more memory
*/
-int shrink_dqcache_memory(int priority, unsigned int gfp_mask)
+int shrink_dqcache_memory(int ratio, unsigned int gfp_mask)
{
- int count = 0;
+ int entries = dqstats.allocated_dquots / ratio + 1;
lock_kernel();
- count = dqstats.free_dquots / priority;
- prune_dqcache(count);
+ prune_dqcache(entries);
unlock_kernel();
- kmem_cache_shrink(dquot_cachep);
- return 0;
+ return entries;
}
/*
diff --git a/fs/inode.c b/fs/inode.c
index 89c96e221043..c07e1e7e1a35 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -386,10 +386,11 @@ void prune_icache(int goal)
count = 0;
entry = inode_unused.prev;
- while (entry != &inode_unused)
- {
+ for(; goal; goal--) {
struct list_head *tmp = entry;
+ if (entry == &inode_unused)
+ break;
entry = entry->prev;
inode = INODE(tmp);
if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
@@ -403,8 +404,6 @@ void prune_icache(int goal)
list_add(tmp, freeable);
inode->i_state |= I_FREEING;
count++;
- if (!--goal)
- break;
}
inodes_stat.nr_unused -= count;
spin_unlock(&inode_lock);
@@ -414,19 +413,11 @@ void prune_icache(int goal)
/*
* This is called from kswapd when we think we need some
- * more memory, but aren't really sure how much. So we
- * carefully try to free a _bit_ of our icache, but not
- * too much.
- *
- * Priority:
- * 1 - very urgent: shrink everything
- * ...
- * 6 - base-level: try to shrink a bit.
+ * more memory.
*/
-int shrink_icache_memory(int priority, int gfp_mask)
+int shrink_icache_memory(int ratio, unsigned int gfp_mask)
{
- int count = 0;
-
+ int entries = inodes_stat.nr_inodes / ratio + 1;
/*
* Nasty deadlock avoidance..
*
@@ -437,12 +428,10 @@ int shrink_icache_memory(int priority, int gfp_mask)
if (!(gfp_mask & __GFP_FS))
return 0;
- count = inodes_stat.nr_unused / priority;
-
- prune_icache(count);
- kmem_cache_shrink(inode_cachep);
- return 0;
+ prune_icache(entries);
+ return entries;
}
+EXPORT_SYMBOL(shrink_icache_memory);
/*
* Called with the inode lock held.
diff --git a/fs/locks.c b/fs/locks.c
index 3702820a3de1..ab969a790fca 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -252,7 +252,7 @@ static int flock_make_lock(struct file *filp,
return -ENOMEM;
fl->fl_file = filp;
- fl->fl_pid = current->pid;
+ fl->fl_pid = current->tgid;
fl->fl_flags = (cmd & LOCK_NB) ? FL_FLOCK : FL_FLOCK | FL_SLEEP;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
@@ -308,7 +308,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files;
- fl->fl_pid = current->pid;
+ fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL;
@@ -348,7 +348,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files;
- fl->fl_pid = current->pid;
+ fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL;
@@ -377,7 +377,7 @@ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
return -ENOMEM;
fl->fl_owner = current->files;
- fl->fl_pid = current->pid;
+ fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_LEASE;
@@ -669,7 +669,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
int error;
fl.fl_owner = current->files;
- fl.fl_pid = current->pid;
+ fl.fl_pid = current->tgid;
fl.fl_file = filp;
fl.fl_flags = FL_POSIX | FL_ACCESS | FL_SLEEP;
fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
@@ -1241,7 +1241,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
*before = fl;
list_add(&fl->fl_link, &file_lock_list);
- error = f_setown(filp, current->pid, 1);
+ error = f_setown(filp, current->tgid, 1);
out_unlock:
unlock_kernel();
return error;
@@ -1632,7 +1632,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
lock.fl_owner = owner;
- lock.fl_pid = current->pid;
+ lock.fl_pid = current->tgid;
lock.fl_file = filp;
if (filp->f_op && filp->f_op->lock != NULL) {
diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h
index b2afa09c062f..41bc8ef0bdf8 100644
--- a/include/asm-i386/io.h
+++ b/include/asm-i386/io.h
@@ -40,7 +40,6 @@
#define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
-#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */
#ifdef __KERNEL__
diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h
index a0ce1b8dba69..9c456727e8a3 100644
--- a/include/asm-i386/semaphore.h
+++ b/include/asm-i386/semaphore.h
@@ -116,7 +116,7 @@ static inline void down(struct semaphore * sem)
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
-
+ might_sleep();
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK "decl %0\n\t" /* --sem->count */
@@ -142,7 +142,7 @@ static inline int down_interruptible(struct semaphore * sem)
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
-
+ might_sleep();
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f99a03f17e60..a64a657545fe 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -186,7 +186,7 @@ extern int shrink_dcache_memory(int, unsigned int);
extern void prune_dcache(int);
/* icache memory management (defined in linux/fs/inode.c) */
-extern int shrink_icache_memory(int, int);
+extern int shrink_icache_memory(int, unsigned int);
extern void prune_icache(int);
/* quota cache memory management (defined in linux/fs/dquot.c) */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e98168f92e67..c5cc69788530 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -52,12 +52,10 @@ extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct requ
extern elevator_t elevator_noop;
/*
- * elevator linus. based on linus ideas of starvation control, using
- * sequencing to manage inserts and merges.
+ * deadline i/o scheduler. uses request time outs to prevent indefinite
+ * starvation
*/
-extern elevator_t elevator_linus;
-#define elv_linus_sequence(rq) ((long)(rq)->elevator_private)
-#define ELV_LINUS_SEEK_COST 16
+extern elevator_t iosched_deadline;
/*
* use the /proc/iosched interface, all the below is history ->
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5efa540d55f8..44c38b134498 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -40,6 +40,13 @@
struct completion;
+#ifdef CONFIG_DEBUG_KERNEL
+void __might_sleep(char *file, int line);
+#define might_sleep() __might_sleep(__FILE__, __LINE__)
+#else
+#define might_sleep() do {} while(0)
+#endif
+
extern struct notifier_block *panic_notifier_list;
NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2)));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c63e4947387f..482db998aca7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -524,6 +524,7 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon
extern struct page * vmalloc_to_page(void *addr);
extern unsigned long get_page_cache_size(void);
+extern unsigned int nr_used_zone_pages(void);
#endif /* __KERNEL__ */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 43390b2e2ef4..bfc986131fe6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -74,9 +74,15 @@ static inline void ___add_to_page_cache(struct page *page,
inc_page_state(nr_pagecache);
}
-extern void FASTCALL(lock_page(struct page *page));
+extern void FASTCALL(__lock_page(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
+static inline void lock_page(struct page *page)
+{
+ if (TestSetPageLocked(page))
+ __lock_page(page);
+}
+
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly!
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 4a7e2bb0d7c4..bfb988885002 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -40,6 +40,7 @@ extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
*/
static inline void down_read(struct rw_semaphore *sem)
{
+ might_sleep();
rwsemtrace(sem,"Entering down_read");
__down_read(sem);
rwsemtrace(sem,"Leaving down_read");
@@ -62,6 +63,7 @@ static inline int down_read_trylock(struct rw_semaphore *sem)
*/
static inline void down_write(struct rw_semaphore *sem)
{
+ might_sleep();
rwsemtrace(sem,"Entering down_write");
__down_write(sem);
rwsemtrace(sem,"Leaving down_write");
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f1346010d73e..471dcb9c108d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -100,8 +100,9 @@ extern unsigned long nr_uninterruptible(void);
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
-#define TASK_ZOMBIE 4
-#define TASK_STOPPED 8
+#define TASK_STOPPED 4
+#define TASK_ZOMBIE 8
+#define TASK_DEAD 16
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8664b02f230d..b6ce459f8792 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -119,6 +119,32 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
_raced; \
})
+/*
+ * Waitqueue's which are removed from the waitqueue_head at wakeup time
+ */
+void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
+ wait_queue_t *wait, int state));
+void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
+ wait_queue_t *wait, int state));
+void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync);
+
+#define DEFINE_WAIT(name) \
+ wait_queue_t name = { \
+ .task = current, \
+ .func = autoremove_wake_function, \
+ .task_list = { .next = &name.task_list, \
+ .prev = &name.task_list, \
+ }, \
+ }
+
+#define init_wait(wait) \
+ do { \
+ wait->task = current; \
+ wait->func = autoremove_wake_function; \
+ INIT_LIST_HEAD(&wait->task_list); \
+ } while (0)
+
#endif /* __KERNEL__ */
#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 7189e9bce6d4..6ed07def4c62 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -32,6 +32,7 @@ int getrusage(struct task_struct *, int, struct rusage *);
static struct dentry * __unhash_process(struct task_struct *p)
{
struct dentry *proc_dentry;
+
nr_threads--;
detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID);
@@ -57,31 +58,31 @@ static struct dentry * __unhash_process(struct task_struct *p)
void release_task(struct task_struct * p)
{
struct dentry *proc_dentry;
+ task_t *leader;
- if (p->state != TASK_ZOMBIE)
+ if (p->state < TASK_ZOMBIE)
BUG();
if (p != current)
wait_task_inactive(p);
atomic_dec(&p->user->processes);
security_ops->task_free_security(p);
free_uid(p->user);
- if (unlikely(p->ptrace)) {
- write_lock_irq(&tasklist_lock);
+ write_lock_irq(&tasklist_lock);
+ if (unlikely(p->ptrace))
__ptrace_unlink(p);
- write_unlock_irq(&tasklist_lock);
- }
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
- write_lock_irq(&tasklist_lock);
__exit_sighand(p);
proc_dentry = __unhash_process(p);
/*
* If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the
- * group leader's parent process.
+ * group leader's parent process. (if it wants notification.)
*/
- if (p->group_leader != p && thread_group_empty(p))
- do_notify_parent(p->group_leader, p->group_leader->exit_signal);
+ leader = p->group_leader;
+ if (leader != p && thread_group_empty(leader) &&
+ leader->state == TASK_ZOMBIE && leader->exit_signal != -1)
+ do_notify_parent(leader, leader->exit_signal);
p->parent->cutime += p->utime + p->cutime;
p->parent->cstime += p->stime + p->cstime;
@@ -159,7 +160,7 @@ static int __will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
if (p == ignored_task
- || p->state == TASK_ZOMBIE
+ || p->state >= TASK_ZOMBIE
|| p->real_parent->pid == 1)
continue;
if (p->real_parent->pgrp != pgrp
@@ -435,8 +436,11 @@ void exit_mm(struct task_struct *tsk)
static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
{
- /* Make sure we're not reparenting to ourselves. */
- if (p == reaper)
+ /*
+ * Make sure we're not reparenting to ourselves and that
+ * the parent is not a zombie.
+ */
+ if (p == reaper || reaper->state >= TASK_ZOMBIE)
p->real_parent = child_reaper;
else
p->real_parent = reaper;
@@ -774,9 +778,10 @@ static int eligible_child(pid_t pid, int options, task_t *p)
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
- int flag, retval;
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
+ unsigned long state;
+ int flag, retval;
if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
@@ -827,7 +832,15 @@ repeat:
*/
if (ret == 2)
continue;
+ /*
+ * Try to move the task's state to DEAD
+ * only one thread is allowed to do this:
+ */
+ state = xchg(&p->state, TASK_DEAD);
+ if (state != TASK_ZOMBIE)
+ continue;
read_unlock(&tasklist_lock);
+
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr) {
if (p->sig->group_exit)
@@ -835,13 +848,16 @@ repeat:
else
retval = put_user(p->exit_code, stat_addr);
}
- if (retval)
- goto end_wait4;
+ if (retval) {
+ p->state = TASK_ZOMBIE;
+ goto end_wait4;
+ }
retval = p->pid;
if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock);
__ptrace_unlink(p);
do_notify_parent(p, SIGCHLD);
+ p->state = TASK_ZOMBIE;
write_unlock_irq(&tasklist_lock);
} else
release_task(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 062a4d1f9c3e..5880309f3fee 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -103,6 +103,52 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
spin_unlock_irqrestore(&q->lock, flags);
}
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ __set_current_state(state);
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue(q, wait);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ __set_current_state(state);
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue_tail(q, wait);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ if (!list_empty(&wait->task_list)) {
+ spin_lock_irqsave(&q->lock, flags);
+ list_del_init(&wait->task_list);
+ spin_unlock_irqrestore(&q->lock, flags);
+ }
+}
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
+{
+ int ret = default_wake_function(wait, mode, sync);
+
+ if (ret)
+ list_del_init(&wait->task_list);
+ return ret;
+}
+
void __init fork_init(unsigned long mempages)
{
/* create a slab on which task_structs can be allocated */
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index 557ae8f7ded2..0409fc676f29 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -400,6 +400,10 @@ EXPORT_SYMBOL(irq_stat);
EXPORT_SYMBOL(add_wait_queue);
EXPORT_SYMBOL(add_wait_queue_exclusive);
EXPORT_SYMBOL(remove_wait_queue);
+EXPORT_SYMBOL(prepare_to_wait);
+EXPORT_SYMBOL(prepare_to_wait_exclusive);
+EXPORT_SYMBOL(finish_wait);
+EXPORT_SYMBOL(autoremove_wake_function);
/* completion handling */
EXPORT_SYMBOL(wait_for_completion);
@@ -493,7 +497,9 @@ EXPORT_SYMBOL(jiffies_64);
EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday);
EXPORT_SYMBOL(do_settimeofday);
-
+#ifdef CONFIG_DEBUG_KERNEL
+EXPORT_SYMBOL(__might_sleep);
+#endif
#if !defined(__ia64__)
EXPORT_SYMBOL(loops_per_jiffy);
#endif
diff --git a/kernel/pid.c b/kernel/pid.c
index b4da62f0aef2..0005a8cc36cb 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -53,6 +53,8 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
+static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
inline void free_pidmap(int pid)
{
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
@@ -77,8 +79,13 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
* Free the page if someone raced with us
* installing it:
*/
- if (cmpxchg(&map->page, NULL, (void *) page))
+ spin_lock(&pidmap_lock);
+ if (map->page)
free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock(&pidmap_lock);
+
if (!map->page)
break;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 304f90fd4bdf..9965e5f7549e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2150,3 +2150,20 @@ void __init sched_init(void)
enter_lazy_tlb(&init_mm, current, smp_processor_id());
}
+#ifdef CONFIG_DEBUG_KERNEL
+void __might_sleep(char *file, int line)
+{
+#if defined(in_atomic)
+ static unsigned long prev_jiffy; /* ratelimiting */
+
+ if (in_atomic()) {
+ if (time_before(jiffies, prev_jiffy + HZ))
+ return;
+ prev_jiffy = jiffies;
+ printk("Sleeping function called from illegal"
+ " context at %s:%d\n", file, line);
+ dump_stack();
+ }
+#endif
+}
+#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3b4be840f931..55c14c11c901 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -888,20 +888,6 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
return -EINVAL;
-
- if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
- current->policy != SCHED_NORMAL)
- {
- /*
- * Short delay requests up to 2 ms will be handled with
- * high precision by a busy wait for all real-time processes.
- *
- * Its important on SMP not to do this holding locks.
- */
- udelay((t.tv_nsec + 999) / 1000);
- return 0;
- }
-
expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
current->state = TASK_INTERRUPTIBLE;
diff --git a/mm/filemap.c b/mm/filemap.c
index 9118a5794f27..f45168a04974 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -632,19 +632,15 @@ static inline wait_queue_head_t *page_waitqueue(struct page *page)
void wait_on_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *waitqueue = page_waitqueue(page);
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(waitqueue, &wait);
do {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (!test_bit(bit_nr, &page->flags))
- break;
+ prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
sync_page(page);
- schedule();
+ if (test_bit(bit_nr, &page->flags))
+ schedule();
} while (test_bit(bit_nr, &page->flags));
- __set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(waitqueue, &wait);
+ finish_wait(waitqueue, &wait);
}
EXPORT_SYMBOL(wait_on_page_bit);
@@ -690,38 +686,27 @@ void end_page_writeback(struct page *page)
EXPORT_SYMBOL(end_page_writeback);
/*
- * Get a lock on the page, assuming we need to sleep
- * to get it..
+ * Get a lock on the page, assuming we need to sleep to get it.
+ *
+ * Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
+ * random driver's requestfn sets TASK_RUNNING, we could busywait. However
+ * chances are that on the second loop, the block layer's plug list is empty,
+ * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
-static void __lock_page(struct page *page)
+void __lock_page(struct page *page)
{
- wait_queue_head_t *waitqueue = page_waitqueue(page);
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ wait_queue_head_t *wqh = page_waitqueue(page);
+ DEFINE_WAIT(wait);
- add_wait_queue_exclusive(waitqueue, &wait);
- for (;;) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (PageLocked(page)) {
- sync_page(page);
+ while (TestSetPageLocked(page)) {
+ prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+ sync_page(page);
+ if (PageLocked(page))
schedule();
- }
- if (!TestSetPageLocked(page))
- break;
}
- __set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(waitqueue, &wait);
-}
-
-/*
- * Get an exclusive lock on the page, optimistically
- * assuming it's not locked..
- */
-void lock_page(struct page *page)
-{
- if (TestSetPageLocked(page))
- __lock_page(page);
+ finish_wait(wqh, &wait);
}
+EXPORT_SYMBOL(__lock_page);
/*
* a rather lightweight function, finding and getting a reference to a
diff --git a/mm/mprotect.c b/mm/mprotect.c
index fc1e3345d38b..be0096238437 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -187,7 +187,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* Try to merge with the previous vma.
*/
if (mprotect_attempt_merge(vma, *pprev, end, newflags))
- return 0;
+ goto success;
} else {
error = split_vma(mm, vma, start, 1);
if (error)
@@ -209,7 +209,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
vma->vm_flags = newflags;
vma->vm_page_prot = newprot;
spin_unlock(&mm->page_table_lock);
-
+success:
change_protection(vma, start, end, newprot);
return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 435a12dd1574..ab3284a3b78a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -321,6 +321,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct page * page;
int freed, i;
+ if (gfp_mask & __GFP_WAIT)
+ might_sleep();
+
KERNEL_STAT_ADD(pgalloc, 1<<order);
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
@@ -479,6 +482,17 @@ unsigned int nr_free_pages(void)
return sum;
}
+unsigned int nr_used_zone_pages(void)
+{
+ unsigned int pages = 0;
+ struct zone *zone;
+
+ for_each_zone(zone)
+ pages += zone->nr_active + zone->nr_inactive;
+
+ return pages;
+}
+
static unsigned int nr_free_zone_pages(int offset)
{
pg_data_t *pgdat;
diff --git a/mm/pdflush.c b/mm/pdflush.c
index d5b5841ef0d9..7c31ae0446b4 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -79,9 +79,9 @@ static unsigned long last_empty_jifs;
*/
struct pdflush_work {
struct task_struct *who; /* The thread */
- void (*fn)(unsigned long); /* A callback function for pdflush to work on */
- unsigned long arg0; /* An argument to the callback function */
- struct list_head list; /* On pdflush_list, when the thread is idle */
+ void (*fn)(unsigned long); /* A callback function */
+ unsigned long arg0; /* An argument to the callback */
+ struct list_head list; /* On pdflush_list, when idle */
unsigned long when_i_went_to_sleep;
};
@@ -99,24 +99,35 @@ static int __pdflush(struct pdflush_work *my_work)
current->flags |= PF_FLUSHER;
my_work->fn = NULL;
my_work->who = current;
+ INIT_LIST_HEAD(&my_work->list);
spin_lock_irq(&pdflush_lock);
nr_pdflush_threads++;
-// printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid);
for ( ; ; ) {
struct pdflush_work *pdf;
- list_add(&my_work->list, &pdflush_list);
- my_work->when_i_went_to_sleep = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
+ list_move(&my_work->list, &pdflush_list);
+ my_work->when_i_went_to_sleep = jiffies;
spin_unlock_irq(&pdflush_lock);
if (current->flags & PF_FREEZE)
refrigerator(PF_IOTHREAD);
schedule();
- if (my_work->fn)
- (*my_work->fn)(my_work->arg0);
+ spin_lock_irq(&pdflush_lock);
+ if (!list_empty(&my_work->list)) {
+ printk("pdflush: bogus wakeup!\n");
+ my_work->fn = NULL;
+ continue;
+ }
+ if (my_work->fn == NULL) {
+ printk("pdflush: NULL work function\n");
+ continue;
+ }
+ spin_unlock_irq(&pdflush_lock);
+
+ (*my_work->fn)(my_work->arg0);
/*
* Thread creation: For how long have there been zero
@@ -132,6 +143,7 @@ static int __pdflush(struct pdflush_work *my_work)
}
spin_lock_irq(&pdflush_lock);
+ my_work->fn = NULL;
/*
* Thread destruction: For how long has the sleepiest
@@ -143,13 +155,12 @@ static int __pdflush(struct pdflush_work *my_work)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
- pdf->when_i_went_to_sleep = jiffies; /* Limit exit rate */
+ /* Limit exit rate */
+ pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */
}
- my_work->fn = NULL;
}
nr_pdflush_threads--;
-// printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid);
spin_unlock_irq(&pdflush_lock);
return 0;
}
@@ -191,11 +202,10 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
list_del_init(&pdf->list);
if (list_empty(&pdflush_list))
last_empty_jifs = jiffies;
- spin_unlock_irqrestore(&pdflush_lock, flags);
pdf->fn = fn;
pdf->arg0 = arg0;
- wmb(); /* ? */
wake_up_process(pdf->who);
+ spin_unlock_irqrestore(&pdflush_lock, flags);
}
return ret;
}
diff --git a/mm/slab.c b/mm/slab.c
index 549cd2f465ea..a6bd0a98734b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1370,6 +1370,9 @@ static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
unsigned long save_flags;
void* objp;
+ if (flags & __GFP_WAIT)
+ might_sleep();
+
kmem_cache_alloc_head(cachep, flags);
try_again:
local_irq_save(save_flags);
@@ -1496,7 +1499,11 @@ static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
if (unlikely(!--slabp->inuse)) {
/* Was partial or full, now empty. */
list_del(&slabp->list);
- list_add(&slabp->list, &cachep->slabs_free);
+/* list_add(&slabp->list, &cachep->slabs_free); */
+ if (unlikely(list_empty(&cachep->slabs_partial)))
+ list_add(&slabp->list, &cachep->slabs_partial);
+ else
+ kmem_slab_destroy(cachep, slabp);
} else if (unlikely(inuse == cachep->num)) {
/* Was full. */
list_del(&slabp->list);
@@ -1970,7 +1977,7 @@ static int s_show(struct seq_file *m, void *p)
}
list_for_each(q,&cachep->slabs_partial) {
slabp = list_entry(q, slab_t, list);
- if (slabp->inuse == cachep->num || !slabp->inuse)
+ if (slabp->inuse == cachep->num)
BUG();
active_objs += slabp->inuse;
active_slabs++;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5eade9423f0d..4302f698a7a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -70,6 +70,10 @@
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
+#ifndef CONFIG_QUOTA
+#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
+#endif
+
/* Must be called with page's pte_chain_lock held. */
static inline int page_mapping_inuse(struct page * page)
{
@@ -97,7 +101,7 @@ static inline int is_page_cache_freeable(struct page *page)
static /* inline */ int
shrink_list(struct list_head *page_list, int nr_pages,
- unsigned int gfp_mask, int *max_scan)
+ unsigned int gfp_mask, int *max_scan, int *nr_mapped)
{
struct address_space *mapping;
LIST_HEAD(ret_pages);
@@ -116,6 +120,10 @@ shrink_list(struct list_head *page_list, int nr_pages,
if (TestSetPageLocked(page))
goto keep;
+ /* Double the slab pressure for mapped and swapcache pages */
+ if (page_mapped(page) || PageSwapCache(page))
+ (*nr_mapped)++;
+
BUG_ON(PageActive(page));
may_enter_fs = (gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (gfp_mask & __GFP_IO));
@@ -320,7 +328,7 @@ keep:
*/
static /* inline */ int
shrink_cache(int nr_pages, struct zone *zone,
- unsigned int gfp_mask, int max_scan)
+ unsigned int gfp_mask, int max_scan, int *nr_mapped)
{
LIST_HEAD(page_list);
struct pagevec pvec;
@@ -371,7 +379,8 @@ shrink_cache(int nr_pages, struct zone *zone,
max_scan -= nr_scan;
KERNEL_STAT_ADD(pgscan, nr_scan);
- nr_pages = shrink_list(&page_list,nr_pages,gfp_mask,&max_scan);
+ nr_pages = shrink_list(&page_list, nr_pages,
+ gfp_mask, &max_scan, nr_mapped);
if (nr_pages <= 0 && list_empty(&page_list))
goto done;
@@ -522,14 +531,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
static /* inline */ int
shrink_zone(struct zone *zone, int max_scan,
- unsigned int gfp_mask, int nr_pages)
+ unsigned int gfp_mask, int nr_pages, int *nr_mapped)
{
unsigned long ratio;
- /* This is bogus for ZONE_HIGHMEM? */
- if (kmem_cache_reap(gfp_mask) >= nr_pages)
- return 0;
-
/*
* Try to keep the active list 2/3 of the size of the cache. And
* make sure that refill_inactive is given a decent number of pages.
@@ -547,7 +552,8 @@ shrink_zone(struct zone *zone, int max_scan,
atomic_sub(SWAP_CLUSTER_MAX, &zone->refill_counter);
refill_inactive_zone(zone, SWAP_CLUSTER_MAX);
}
- nr_pages = shrink_cache(nr_pages, zone, gfp_mask, max_scan);
+ nr_pages = shrink_cache(nr_pages, zone, gfp_mask,
+ max_scan, nr_mapped);
return nr_pages;
}
@@ -557,6 +563,9 @@ shrink_caches(struct zone *classzone, int priority,
{
struct zone *first_classzone;
struct zone *zone;
+ int ratio;
+ int nr_mapped = 0;
+ int pages = nr_used_zone_pages();
first_classzone = classzone->zone_pgdat->node_zones;
for (zone = classzone; zone >= first_classzone; zone--) {
@@ -581,16 +590,28 @@ shrink_caches(struct zone *classzone, int priority,
max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2;
- unreclaimed = shrink_zone(zone, max_scan, gfp_mask, to_reclaim);
+ unreclaimed = shrink_zone(zone, max_scan,
+ gfp_mask, to_reclaim, &nr_mapped);
nr_pages -= to_reclaim - unreclaimed;
*total_scanned += max_scan;
}
- shrink_dcache_memory(priority, gfp_mask);
- shrink_icache_memory(1, gfp_mask);
-#ifdef CONFIG_QUOTA
- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
-#endif
+ /*
+ * Here we assume it costs one seek to replace a lru page and that
+ * it also takes a seek to recreate a cache object. With this in
+ * mind we age equal percentages of the lru and ageable caches.
+ * This should balance the seeks generated by these structures.
+ *
+ * NOTE: for now I do this for all zones. If we find this is too
+ * aggressive on large boxes we may want to exclude ZONE_HIGHMEM
+ *
+ * If we're encountering mapped pages on the LRU then increase the
+ * pressure on slab to avoid swapping.
+ */
+ ratio = (pages / (*total_scanned + nr_mapped + 1)) + 1;
+ shrink_dcache_memory(ratio, gfp_mask);
+ shrink_icache_memory(ratio, gfp_mask);
+ shrink_dqcache_memory(ratio, gfp_mask);
return nr_pages;
}