diff options
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | arch/i386/kernel/traps.c | 1 | ||||
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 135 | ||||
| -rw-r--r-- | drivers/scsi/scsi_lib.c | 9 | ||||
| -rw-r--r-- | fs/inode.c | 5 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 1 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 16 | ||||
| -rw-r--r-- | include/linux/fs.h | 9 | ||||
| -rw-r--r-- | mm/filemap.c | 3 | ||||
| -rw-r--r-- | mm/vmalloc.c | 1 |
10 files changed, 141 insertions, 41 deletions
@@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 1 -EXTRAVERSION =-pre5 +EXTRAVERSION =-pre6 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 158d183b8c17..7c02813afc8c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -23,6 +23,7 @@ #include <linux/delay.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/highmem.h> #ifdef CONFIG_MCA #include <linux/mca.h> diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 9ac560a6c054..f4e41801bb67 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -19,6 +19,7 @@ #include <linux/config.h> #include <linux/locks.h> #include <linux/mm.h> +#include <linux/swap.h> #include <linux/init.h> #include <linux/smp_lock.h> @@ -118,6 +119,19 @@ int * max_readahead[MAX_BLKDEV]; */ int * max_sectors[MAX_BLKDEV]; +/* + * queued sectors for all devices, used to make sure we don't fill all + * of memory with locked buffers + */ +atomic_t queued_sectors; + +/* + * high and low watermark for above + */ +static int high_queued_sectors, low_queued_sectors; +static int batch_requests, queue_nr_requests; +static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait); + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -185,7 +199,7 @@ static int __blk_cleanup_queue(struct list_head *head) **/ void blk_cleanup_queue(request_queue_t * q) { - int count = QUEUE_NR_REQUESTS; + int count = queue_nr_requests; count -= __blk_cleanup_queue(&q->request_freelist[READ]); count -= __blk_cleanup_queue(&q->request_freelist[WRITE]); @@ -385,7 +399,7 @@ static void blk_init_free_list(request_queue_t *q) /* * Divide requests in half between read and write */ - for (i = 0; i < QUEUE_NR_REQUESTS; i++) { + for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; @@ -559,14 +573,12 @@ inline void drive_stat_acct (kdev_t dev, int rw, /* * add-request adds a request to the linked list. - * It disables interrupts (acquires the request spinlock) so that it can muck - * with the request-lists in peace. Thus it should be called with no spinlocks - * held. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. * * By this point, req->cmd is always either READ/WRITE, never READA, * which is important for drive_stat_acct() above. */ - static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { @@ -622,9 +634,17 @@ void inline blkdev_release_request(struct request *req) req->q = NULL; /* - * Request may not have originated from ll_rw_blk + * Request may not have originated from ll_rw_blk. if not, + * asumme it has free buffers and check waiters */ if (q) { + /* + * we've released enough buffers to start I/O again + */ + if (waitqueue_active(&blk_buffers_wait) + && atomic_read(&queued_sectors) < low_queued_sectors) + wake_up(&blk_buffers_wait); + if (!list_empty(&q->request_freelist[rw])) { blk_refill_freelist(q, rw); list_add(&req->table, &q->request_freelist[rw]); @@ -637,7 +657,7 @@ void inline blkdev_release_request(struct request *req) */ list_add(&req->table, &q->pending_freelist[rw]); - if (++q->pending_free[rw] >= (QUEUE_NR_REQUESTS >> 4)) { + if (++q->pending_free[rw] >= batch_requests) { int wake_up = q->pending_free[rw]; blk_refill_freelist(q, rw); wake_up_nr(&q->wait_for_request, wake_up); @@ -669,7 +689,7 @@ static void attempt_merge(request_queue_t * q, * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if(!q->merge_requests_fn(q, req, next, max_segments)) + if (!q->merge_requests_fn(q, req, next, max_segments)) return; q->elevator.elevator_merge_req_fn(req, next); @@ -755,13 +775,13 @@ static int __make_request(request_queue_t * q, int rw, max_sectors = get_max_sectors(bh->b_rdev); again: + head = &q->queue_head; /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic */ spin_lock_irq(&io_request_lock); - head = &q->queue_head; insert_here = head->prev; if (list_empty(head)) { q->plug_device_fn(q, bh->b_rdev); /* is atomic */ @@ -780,6 +800,7 @@ again: req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); attempt_back_merge(q, req, max_sectors, max_segments); goto out; @@ -794,6 +815,7 @@ again: req->current_nr_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); attempt_front_merge(q, head, req, max_sectors, max_segments); goto out; @@ -817,10 +839,9 @@ again: } /* - * Grab a free request from the freelist. Read first try their - * own queue - if that is empty, we steal from the write list. - * Writes must block if the write list is empty, and read aheads - * are not crucial. + * Grab a free request from the freelist - if that is empty, check + * if we are doing read ahead and abort instead of blocking for + * a free slot. */ get_rq: if (freereq) { @@ -849,6 +870,7 @@ get_rq: req->bh = bh; req->bhtail = bh; req->rq_dev = bh->b_rdev; + blk_started_io(count); add_request(q, req, insert_here); out: if (freereq) @@ -901,13 +923,13 @@ void generic_make_request (int rw, struct buffer_head * bh) int major = MAJOR(bh->b_rdev); request_queue_t *q; - if (!bh->b_end_io) BUG(); + if (!bh->b_end_io) + BUG(); + if (blk_size[major]) { unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1; - unsigned int sector, count; - - count = bh->b_size >> 9; - sector = bh->b_rsector; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; if (maxsector < count || maxsector - count < sector) { bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); @@ -918,7 +940,7 @@ void generic_make_request (int rw, struct buffer_head * bh) when mounting a device. */ printk(KERN_INFO "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n", + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", kdevname(bh->b_rdev), rw, (sector + count)>>1, blk_size[major][MINOR(bh->b_rdev)]); @@ -945,14 +967,13 @@ void generic_make_request (int rw, struct buffer_head * bh) buffer_IO_error(bh); break; } - } - while (q->make_request_fn(q, rw, bh)); + } while (q->make_request_fn(q, rw, bh)); } /** * submit_bh: submit a buffer_head to the block device later for I/O - * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead) + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) * @bh: The &struct buffer_head which describes the I/O * * submit_bh() is very similar in purpose to generic_make_request(), and @@ -975,7 +996,7 @@ void submit_bh(int rw, struct buffer_head * bh) * further remap this. */ bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * (bh->b_size>>9); + bh->b_rsector = bh->b_blocknr * (bh->b_size >> 9); generic_make_request(rw, bh); @@ -1050,8 +1071,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { - struct buffer_head *bh; - bh = bhs[i]; + struct buffer_head *bh = bhs[i]; if (bh->b_size % correct_size) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", @@ -1068,8 +1088,17 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) } for (i = 0; i < nr; i++) { - struct buffer_head *bh; - bh = bhs[i]; + struct buffer_head *bh = bhs[i]; + + /* + * don't lock any more buffers if we are above the high + * water mark. instead start I/O on the queued stuff. + */ + if (atomic_read(&queued_sectors) >= high_queued_sectors) { + run_task_queue(&tq_disk); + wait_event(blk_buffers_wait, + atomic_read(&queued_sectors) < low_queued_sectors); + } /* Only one thread can actually submit the I/O. */ if (test_and_set_bit(BH_Lock, &bh->b_state)) @@ -1132,6 +1161,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name) if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; + blk_finished_io(nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); @@ -1161,9 +1191,12 @@ void end_that_request_last(struct request *req) blkdev_release_request(req); } +#define MB(kb) ((kb) << 10) + int __init blk_dev_init(void) { struct blk_dev_struct *dev; + int total_ram; request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), @@ -1178,6 +1211,51 @@ int __init blk_dev_init(void) memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); + + atomic_set(&queued_sectors, 0); + total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + + /* + * Try to keep 128MB max hysteris. If not possible, + * use half of RAM + */ + high_queued_sectors = (total_ram * 2) / 3; + low_queued_sectors = high_queued_sectors - MB(128); + if (low_queued_sectors < 0) + low_queued_sectors = total_ram / 2; + + /* + * for big RAM machines (>= 384MB), use more for I/O + */ + if (total_ram >= MB(384)) { + high_queued_sectors = (total_ram * 4) / 5; + low_queued_sectors = high_queued_sectors - MB(128); + } + + /* + * make it sectors (512b) + */ + high_queued_sectors <<= 1; + low_queued_sectors <<= 1; + + /* + * Scale free request slots per queue too + */ + total_ram = (total_ram + MB(32) - 1) & ~(MB(32) - 1); + if ((queue_nr_requests = total_ram >> 9) > QUEUE_NR_REQUESTS) + queue_nr_requests = QUEUE_NR_REQUESTS; + + /* + * adjust batch frees according to queue length, with upper limit + */ + if ((batch_requests = queue_nr_requests >> 3) > 32) + batch_requests = 32; + + printk("block: queued sectors max/low %dkB/%dkB, %d slots per queue\n", + high_queued_sectors / 2, + low_queued_sectors / 2, + queue_nr_requests); + #ifdef CONFIG_AMIGA_Z2RAM z2_init(); #endif @@ -1300,3 +1378,4 @@ EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(queued_sectors); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 04ccfe6d6d75..dcde48437493 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -362,6 +362,7 @@ static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt, struct request *req; struct buffer_head *bh; Scsi_Device * SDpnt; + int nsect; ASSERT_LOCK(&io_request_lock, 0); @@ -373,11 +374,13 @@ static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt, } do { if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); req->bh = bh->b_reqnext; - req->nr_sectors -= bh->b_size >> 9; - req->sector += bh->b_size >> 9; + req->nr_sectors -= nsect; + req->sector += nsect; bh->b_reqnext = NULL; - sectors -= bh->b_size >> 9; + sectors -= nsect; bh->b_end_io(bh, uptodate); if ((bh = req->bh) != NULL) { req->current_nr_sectors = bh->b_size >> 9; diff --git a/fs/inode.c b/fs/inode.c index c85ed6426331..7939267f4502 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -134,12 +134,17 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block * sb = inode->i_sb; + if (sb) { /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */ if (flags & (I_DIRTY | I_DIRTY_SYNC)) { if (sb->s_op && sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode); } + /* avoid the locking if we can */ + if ((inode->i_state & flags) != flags) { + return ; + } spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { inode->i_state |= flags; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 00c16769e002..f95e9f6241cc 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -17,6 +17,7 @@ #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/io.h> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ea8f7064ba7..01cd38a8368e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -64,9 +64,10 @@ typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); typedef void (unplug_device_fn) (void *q); /* - * Default nr free requests per queue + * Default nr free requests per queue, ll_rw_blk will scale it down + * according to available RAM at init time */ -#define QUEUE_NR_REQUESTS 512 +#define QUEUE_NR_REQUESTS 8192 struct request_queue { @@ -176,6 +177,8 @@ extern int * max_sectors[MAX_BLKDEV]; extern int * max_segments[MAX_BLKDEV]; +extern atomic_t queued_sectors; + #define MAX_SEGMENTS 128 #define MAX_SECTORS (MAX_SEGMENTS*8) @@ -203,5 +206,14 @@ static inline int get_hardsect_size(kdev_t dev) return 512; } +#define blk_finished_io(nsects) \ + atomic_sub(nsects, &queued_sectors); \ + if (atomic_read(&queued_sectors) < 0) { \ + printk("block: queued_sectors < 0\n"); \ + atomic_set(&queued_sectors, 0); \ + } + +#define blk_started_io(nsects) \ + atomic_add(nsects, &queued_sectors); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index f2d99107bf79..686aef31a600 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -825,20 +825,17 @@ struct super_operations { extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { - if ((inode->i_state & I_DIRTY) != I_DIRTY) - __mark_inode_dirty(inode, I_DIRTY); + __mark_inode_dirty(inode, I_DIRTY); } static inline void mark_inode_dirty_sync(struct inode *inode) { - if (!(inode->i_state & I_DIRTY_SYNC)) - __mark_inode_dirty(inode, I_DIRTY_SYNC); + __mark_inode_dirty(inode, I_DIRTY_SYNC); } static inline void mark_inode_dirty_pages(struct inode *inode) { - if (inode && !(inode->i_state & I_DIRTY_PAGES)) - __mark_inode_dirty(inode, I_DIRTY_PAGES); + __mark_inode_dirty(inode, I_DIRTY_PAGES); } struct dquot_operations { diff --git a/mm/filemap.c b/mm/filemap.c index cd97dc77f5ad..4c89ad3e9156 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,7 +143,8 @@ void __set_page_dirty(struct page *page) list_add(&page->list, &mapping->dirty_pages); spin_unlock(&pagecache_lock); - mark_inode_dirty_pages(mapping->host); + if (mapping->host) + mark_inode_dirty_pages(mapping->host); } /** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 62ce5f1ffed8..93edab662caa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -9,6 +9,7 @@ #include <linux/malloc.h> #include <linux/vmalloc.h> #include <linux/spinlock.h> +#include <linux/highmem.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> |
