diff options
| author | Linus Torvalds <torvalds@home.transmeta.com> | 2002-10-13 05:55:22 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-10-13 05:55:22 -0700 |
| commit | 71660e156c8516c75e63d40d9f6c19af82340071 (patch) | |
| tree | 3aaaee33be8ccec92e6944a03e3588e090e21a0f | |
| parent | 31c96625109bb9deb166cfd3fbe08d0a92ad98fa (diff) | |
| parent | 2dcb8ff9ea7bfdc161eec1eeb8f94c2ba5c3c8a8 (diff) | |
Merge bk://linuxusb.bkbits.net/linus-2.5
into home.transmeta.com:/home/torvalds/v2.5/linux
| -rw-r--r-- | arch/cris/drivers/examples/kiobuftest.c | 111 | ||||
| -rw-r--r-- | arch/i386/mm/discontig.c | 1 | ||||
| -rw-r--r-- | drivers/block/Config.in | 3 | ||||
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 2 | ||||
| -rw-r--r-- | drivers/media/video/bttv-risc.c | 1 | ||||
| -rw-r--r-- | drivers/media/video/bttvp.h | 1 | ||||
| -rw-r--r-- | drivers/media/video/video-buf.c | 109 | ||||
| -rw-r--r-- | drivers/media/video/video-buf.h | 8 | ||||
| -rw-r--r-- | fs/Makefile | 2 | ||||
| -rw-r--r-- | fs/aio.c | 7 | ||||
| -rw-r--r-- | fs/bio.c | 125 | ||||
| -rw-r--r-- | fs/block_dev.c | 1 | ||||
| -rw-r--r-- | fs/buffer.c | 60 | ||||
| -rw-r--r-- | fs/dcache.c | 50 | ||||
| -rw-r--r-- | fs/dquot.c | 28 | ||||
| -rw-r--r-- | fs/fcntl.c | 1 | ||||
| -rw-r--r-- | fs/file_table.c | 1 | ||||
| -rw-r--r-- | fs/inode.c | 49 | ||||
| -rw-r--r-- | fs/iobuf.c | 125 | ||||
| -rw-r--r-- | fs/open.c | 1 | ||||
| -rw-r--r-- | fs/xfs/linux/xfs_aops.c | 1 | ||||
| -rw-r--r-- | fs/xfs/linux/xfs_ioctl.c | 2 | ||||
| -rw-r--r-- | include/linux/buffer_head.h | 1 | ||||
| -rw-r--r-- | include/linux/dcache.h | 11 | ||||
| -rw-r--r-- | include/linux/iobuf.h | 88 | ||||
| -rw-r--r-- | include/linux/mm.h | 23 | ||||
| -rw-r--r-- | init/main.c | 1 | ||||
| -rw-r--r-- | kernel/ksyms.c | 16 | ||||
| -rw-r--r-- | mm/filemap.c | 1 | ||||
| -rw-r--r-- | mm/memory.c | 221 | ||||
| -rw-r--r-- | mm/msync.c | 5 | ||||
| -rw-r--r-- | mm/page_alloc.c | 2 | ||||
| -rw-r--r-- | mm/vmscan.c | 119 |
33 files changed, 271 insertions, 906 deletions
diff --git a/arch/cris/drivers/examples/kiobuftest.c b/arch/cris/drivers/examples/kiobuftest.c deleted file mode 100644 index 784418f9c4d6..000000000000 --- a/arch/cris/drivers/examples/kiobuftest.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Example showing how to pin down a range of virtual pages from user-space - * to be able to do for example DMA directly into them. - * - * It is necessary because the pages the virtual pointers reference, might - * not exist in memory (could be mapped to the zero-page, filemapped etc) - * and DMA cannot trigger the MMU to force them in (and would have time - * contraints making it impossible to wait for it anyway). - * - * Author: Bjorn Wesen - * - * $Log: kiobuftest.c,v $ - * Revision 1.1.1.1 2001/12/17 13:59:27 bjornw - * Import of Linux 2.5.1 - * - * Revision 1.2 2001/02/27 13:52:50 bjornw - * malloc.h -> slab.h - * - * Revision 1.1 2001/01/19 15:57:49 bjornw - * Example of how to do direct HW -> user-mode DMA - * - * - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/iobuf.h> - -#define KIOBUFTEST_MAJOR 124 /* in the local range, experimental */ - - -static ssize_t -kiobuf_read(struct file *filp, char *buf, size_t len, loff_t *ppos) -{ - - struct kiobuf *iobuf; - int res, i; - - /* Make a kiobuf that maps the entire length the reader has given - * us - */ - - res = alloc_kiovec(1, &iobuf); - if (res) - return res; - - if((res = map_user_kiobuf(READ, iobuf, (unsigned long)buf, len))) { - printk("map_user_kiobuf failed, return %d\n", res); - return res; - } - - /* At this point, the virtual area buf[0] -> buf[len-1] will - * have corresponding pages mapped in physical memory and locked - * until we unmap the kiobuf. They cannot be swapped out or moved - * around. - */ - - printk("nr_pages == %d\noffset == %d\nlength == %d\n", - iobuf->nr_pages, iobuf->offset, iobuf->length); - - for(i = 0; i < iobuf->nr_pages; i++) { - printk("page_add(maplist[%d]) == 0x%x\n", i, - page_address(iobuf->maplist[i])); - } - - /* This is the place to create the necessary scatter-gather vector - * for the DMA using the iobuf->maplist array and page_address - * (don't forget __pa if the DMA needs the actual physical DRAM address) - * and run it. - */ - - - - - /* Release the mapping and exit */ - - unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */ - - return len; -} - - -static struct file_operations kiobuf_fops = { - owner: THIS_MODULE, - read: kiobuf_read -}; - -static int __init -kiobuftest_init(void) -{ - int res; - - /* register char device */ - - res = register_chrdev(KIOBUFTEST_MAJOR, "kiobuftest", &kiobuf_fops); - if(res < 0) { - printk(KERN_ERR "kiobuftest: couldn't get a major number.\n"); - return res; - } - - printk("Initializing kiobuf-test device\n"); -} - -module_init(kiobuftest_init); diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1ff190bd68b4..ce54c1886dbc 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -70,6 +70,7 @@ static void __init allocate_pgdat(int nid) node_datasz = PFN_UP(sizeof(struct pglist_data)); NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); min_low_pfn += node_datasz; + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); } /* diff --git a/drivers/block/Config.in b/drivers/block/Config.in index 6d2d99d8124c..49ba18434d92 100644 --- a/drivers/block/Config.in +++ b/drivers/block/Config.in @@ -49,7 +49,6 @@ fi dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM if [ "$CONFIG_X86" = "y" -o "$CONFIG_PPC32" = "y" ]; then -# bool 'Support for Large Block Devices' CONFIG_LBD - define_bool CONFIG_LBD y + bool 'Support for Large Block Devices' CONFIG_LBD fi endmenu diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 276189c6957a..ea56c1d8456c 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1878,7 +1878,7 @@ end_io: */ int submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio) >> 1; + int count = bio_sectors(bio); BUG_ON(!bio->bi_end_io); BIO_BUG_ON(!bio->bi_size); diff --git a/drivers/media/video/bttv-risc.c b/drivers/media/video/bttv-risc.c index d63b5b48481c..ddb6f4328189 100644 --- a/drivers/media/video/bttv-risc.c +++ b/drivers/media/video/bttv-risc.c @@ -29,7 +29,6 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> -#include <linux/iobuf.h> #include <linux/vmalloc.h> #include <linux/interrupt.h> #include <asm/page.h> diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h index c58be937f95f..01443316cf39 100644 --- a/drivers/media/video/bttvp.h +++ b/drivers/media/video/bttvp.h @@ -31,7 +31,6 @@ #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> #include <linux/videodev.h> -#include <linux/iobuf.h> #include <linux/pci.h> #include <asm/scatterlist.h> diff --git a/drivers/media/video/video-buf.c b/drivers/media/video/video-buf.c index d1c783401b29..03d3bbd1356a 100644 --- a/drivers/media/video/video-buf.c +++ b/drivers/media/video/video-buf.c @@ -18,8 +18,8 @@ #include <linux/init.h> #include <linux/module.h> -#include <linux/iobuf.h> #include <linux/vmalloc.h> +#include <linux/pagemap.h> #include <linux/slab.h> #include <linux/pci.h> #include <linux/interrupt.h> @@ -65,32 +65,31 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages) return NULL; } -struct scatterlist* -videobuf_iobuf_to_sg(struct kiobuf *iobuf) +struct scatterlist * +videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset) { struct scatterlist *sglist; int i = 0; - - sglist = kmalloc(sizeof(struct scatterlist) * iobuf->nr_pages, - GFP_KERNEL); + + if (NULL == pages[0]) + return NULL; + sglist = kmalloc(sizeof(*sglist) * nr_pages, GFP_KERNEL); if (NULL == sglist) return NULL; - memset(sglist,0,sizeof(struct scatterlist) * iobuf->nr_pages); + memset(sglist, 0, sizeof(*sglist) * nr_pages); - if (NULL == iobuf->maplist[0]) - goto err; - if (PageHighMem(iobuf->maplist[0])) + if (PageHighMem(pages[0])) /* DMA to highmem pages might not work */ goto err; - sglist[0].page = iobuf->maplist[0]; - sglist[0].offset = iobuf->offset; - sglist[0].length = PAGE_SIZE - iobuf->offset; - for (i = 1; i < iobuf->nr_pages; i++) { - if (NULL == iobuf->maplist[i]) + sglist[0].page = pages[0]; + sglist[0].offset = offset; + sglist[0].length = PAGE_SIZE - offset; + for (i = 1; i < nr_pages; i++) { + if (NULL == pages[i]) goto err; - if (PageHighMem(iobuf->maplist[i])) + if (PageHighMem(pages[i])) goto err; - sglist[i].page = iobuf->maplist[i]; + sglist[i].page = pages[i]; sglist[i].length = PAGE_SIZE; } return sglist; @@ -100,6 +99,30 @@ videobuf_iobuf_to_sg(struct kiobuf *iobuf) return NULL; } +int videobuf_lock(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + if (TestSetPageLocked(pages[i])) + goto err; + return 0; + + err: + while (i > 0) + unlock_page(pages[--i]); + return -EINVAL; +} + +int videobuf_unlock(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + unlock_page(pages[i]); + return 0; +} + /* --------------------------------------------------------------------- */ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, @@ -113,14 +136,21 @@ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, case PCI_DMA_TODEVICE: rw = WRITE; break; default: BUG(); } - if (0 != (err = alloc_kiovec(1,&dma->iobuf))) - return err; - if (0 != (err = map_user_kiobuf(rw, dma->iobuf, data, size))) { - dprintk(1,"map_user_kiobuf: %d\n",err); - return err; - } - dma->nr_pages = dma->iobuf->nr_pages; - return 0; + + dma->offset = data & PAGE_MASK; + dma->nr_pages = ((((data+size) & ~PAGE_MASK) - + (data & ~PAGE_MASK)) >> PAGE_SHIFT) +1; + dma->pages = kmalloc(dma->nr_pages * sizeof(struct page*), + GFP_KERNEL); + if (NULL == dma->pages) + return -ENOMEM; + down_read(¤t->mm->mmap_sem); + err = get_user_pages(current,current->mm, + data, dma->nr_pages, + rw == READ, 0, /* don't force */ + dma->pages, NULL); + up_read(¤t->mm->mmap_sem); + return err; } int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction, @@ -144,13 +174,15 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) if (0 == dma->nr_pages) BUG(); - if (dma->iobuf) { - if (0 != (err = lock_kiovec(1,&dma->iobuf,1))) { - dprintk(1,"lock_kiovec: %d\n",err); + if (dma->pages) { + if (0 != (err = videobuf_lock(dma->pages, dma->nr_pages))) { + dprintk(1,"videobuf_lock_pages: %d\n",err); return err; } - dma->sglist = videobuf_iobuf_to_sg(dma->iobuf); + dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages, + dma->offset); } + if (dma->vmalloc) { dma->sglist = videobuf_vmalloc_to_sg (dma->vmalloc,dma->nr_pages); @@ -160,7 +192,7 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) return -ENOMEM; } dma->sglen = pci_map_sg(dev,dma->sglist,dma->nr_pages, - dma->direction); + dma->direction); return 0; } @@ -182,8 +214,8 @@ int videobuf_dma_pci_unmap(struct pci_dev *dev, struct videobuf_dmabuf *dma) kfree(dma->sglist); dma->sglist = NULL; dma->sglen = 0; - if (dma->iobuf) - unlock_kiovec(1,&dma->iobuf); + if (dma->pages) + videobuf_lock(dma->pages, dma->nr_pages); return 0; } @@ -192,11 +224,14 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) if (dma->sglen) BUG(); - if (dma->iobuf) { - unmap_kiobuf(dma->iobuf); - free_kiovec(1,&dma->iobuf); - dma->iobuf = NULL; + if (dma->pages) { + int i; + for (i=0; i < dma->nr_pages; i++) + page_cache_release(dma->pages[i]); + kfree(dma->pages); + dma->pages = NULL; } + if (dma->vmalloc) { vfree(dma->vmalloc); dma->vmalloc = NULL; @@ -959,6 +994,7 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, map->q = q; vma->vm_ops = &videobuf_vm_ops; vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */ vma->vm_private_data = map; dprintk(1,"mmap %p: %08lx-%08lx pgoff %08lx bufs %d-%d\n", map,vma->vm_start,vma->vm_end,vma->vm_pgoff,first,last); @@ -972,7 +1008,6 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, /* --------------------------------------------------------------------- */ EXPORT_SYMBOL_GPL(videobuf_vmalloc_to_sg); -EXPORT_SYMBOL_GPL(videobuf_iobuf_to_sg); EXPORT_SYMBOL_GPL(videobuf_dma_init_user); EXPORT_SYMBOL_GPL(videobuf_dma_init_kernel); diff --git a/drivers/media/video/video-buf.h b/drivers/media/video/video-buf.h index 0e2c5860b953..3d8710848ca1 100644 --- a/drivers/media/video/video-buf.h +++ b/drivers/media/video/video-buf.h @@ -28,11 +28,12 @@ struct scatterlist* videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages); /* - * Return a scatterlist for a locked iobuf (NULL on errors). Memory + * Return a scatterlist for a an array of userpages (NULL on errors). Memory * for the scatterlist is allocated using kmalloc. The caller must * free the memory. */ -struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); +struct scatterlist *videobuf_pages_to_sg(struct page **pages, int nr_pages, + int offset); /* --------------------------------------------------------------------- */ @@ -57,7 +58,8 @@ struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); struct videobuf_dmabuf { /* for userland buffer */ - struct kiobuf *iobuf; + struct page **pages; + int offset; /* for kernel buffers */ void *vmalloc; diff --git a/fs/Makefile b/fs/Makefile index 7f349ff168ad..d902bdd8bda3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \ obj-y := open.o read_write.o devices.o file_table.o buffer.o \ bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ - dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ + dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \ filesystems.o namespace.o seq_file.o xattr.o libfs.o \ fs-writeback.o mpage.o direct-io.o aio.o @@ -9,6 +9,7 @@ * See ../COPYING for licensing terms. */ #include <linux/kernel.h> +#include <linux/init.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/aio_abi.h> @@ -21,15 +22,9 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/vmalloc.h> -#include <linux/iobuf.h> #include <linux/slab.h> #include <linux/timer.h> -#include <linux/brlock.h> #include <linux/aio.h> -#include <linux/smp_lock.h> -#include <linux/compiler.h> -#include <linux/brlock.h> #include <linux/module.h> #include <linux/highmem.h> #include <linux/workqueue.h> @@ -20,7 +20,7 @@ #include <linux/bio.h> #include <linux/blk.h> #include <linux/slab.h> -#include <linux/iobuf.h> +#include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mempool.h> @@ -438,128 +438,6 @@ retry_segments: return 0; } -static int bio_end_io_kio(struct bio *bio, unsigned int bytes_done, int error) -{ - struct kiobuf *kio = (struct kiobuf *) bio->bi_private; - - if (bio->bi_size) - return 1; - - end_kio_request(kio, error); - bio_put(bio); - return 0; -} - -/** - * ll_rw_kio - submit a &struct kiobuf for I/O - * @rw: %READ or %WRITE - * @kio: the kiobuf to do I/O on - * @bdev: target device - * @sector: start location on disk - * - * Description: - * ll_rw_kio will map the page list inside the &struct kiobuf to - * &struct bio and queue them for I/O. The kiobuf given must describe - * a continous range of data, and must be fully prepared for I/O. - **/ -void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector) -{ - int i, offset, size, err, map_i, total_nr_pages, nr_pages; - struct bio *bio; - - err = 0; - if ((rw & WRITE) && bdev_read_only(bdev)) { - printk("ll_rw_bio: WRITE to ro device %s\n", bdevname(bdev)); - err = -EPERM; - goto out; - } - - if (!kio->nr_pages) { - err = -EINVAL; - goto out; - } - - /* - * maybe kio is bigger than the max we can easily map into a bio. - * if so, split it up in appropriately sized chunks. - */ - total_nr_pages = kio->nr_pages; - offset = kio->offset & ~PAGE_MASK; - size = kio->length; - - atomic_set(&kio->io_count, 1); - - map_i = 0; - -next_chunk: - nr_pages = BIO_MAX_PAGES; - if (nr_pages > total_nr_pages) - nr_pages = total_nr_pages; - - atomic_inc(&kio->io_count); - - /* - * allocate bio and do initial setup - */ - if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) { - err = -ENOMEM; - goto out; - } - - bio->bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_idx = 0; - bio->bi_end_io = bio_end_io_kio; - bio->bi_private = kio; - - for (i = 0; i < nr_pages; i++, map_i++) { - int nbytes = PAGE_SIZE - offset; - - if (nbytes > size) - nbytes = size; - - BUG_ON(kio->maplist[map_i] == NULL); - - /* - * if we can't add this page to the bio, submit for i/o - * and alloc a new one if needed - */ - if (bio_add_page(bio, kio->maplist[map_i], nbytes, offset)) - break; - - /* - * kiobuf only has an offset into the first page - */ - offset = 0; - - sector += nbytes >> 9; - size -= nbytes; - total_nr_pages--; - kio->offset += nbytes; - } - - submit_bio(rw, bio); - - if (total_nr_pages) - goto next_chunk; - - if (size) { - printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length); - BUG(); - } - -out: - if (err) - kio->errno = err; - - /* - * final atomic_dec of io_count to match our initial setting of 1. - * I/O may or may not have completed at this point, final completion - * handler is only run on last decrement. - */ - end_kio_request(kio, !err); -} - /** * bio_endio - end I/O on a bio * @bio: bio @@ -662,7 +540,6 @@ module_init(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); -EXPORT_SYMBOL(ll_rw_kio); EXPORT_SYMBOL(bio_endio); EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(bio_copy); diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b95ff2d40a4..33fc669b7842 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -14,7 +14,6 @@ #include <linux/major.h> #include <linux/devfs_fs_kernel.h> #include <linux/smp_lock.h> -#include <linux/iobuf.h> #include <linux/highmem.h> #include <linux/blkdev.h> #include <linux/module.h> diff --git a/fs/buffer.c b/fs/buffer.c index 30c0adeec762..d024b78c3e60 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -28,7 +28,6 @@ #include <linux/blkdev.h> #include <linux/file.h> #include <linux/quotaops.h> -#include <linux/iobuf.h> #include <linux/highmem.h> #include <linux/module.h> #include <linux/writeback.h> @@ -2300,65 +2299,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, return tmp.b_blocknr; } -/* - * Start I/O on a physical range of kernel memory, defined by a vector - * of kiobuf structs (much like a user-space iovec list). - * - * The kiobuf must already be locked for IO. IO is submitted - * asynchronously: you need to check page->locked and page->uptodate. - * - * It is up to the caller to make sure that there are enough blocks - * passed in to completely map the iobufs to disk. - */ -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - struct block_device *bdev, sector_t b[], int size) -{ - int transferred; - int i; - int err; - struct kiobuf * iobuf; - - if (!nr) - return 0; - - /* - * First, do some alignment and validity checks - */ - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) - return -EINVAL; - if (!iobuf->nr_pages) - panic("brw_kiovec: iobuf not initialised"); - } - - /* - * OK to walk down the iovec doing page IO on each page we find. - */ - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - iobuf->errno = 0; - - ll_rw_kio(rw, iobuf, bdev, b[i] * (size >> 9)); - } - - /* - * now they are all submitted, wait for completion - */ - transferred = 0; - err = 0; - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - kiobuf_wait_for_io(iobuf); - if (iobuf->errno && !err) - err = iobuf->errno; - if (!err) - transferred += iobuf->length; - } - - return err ? err : transferred; -} - static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err) { struct buffer_head *bh = bio->bi_private; diff --git a/fs/dcache.c b/fs/dcache.c index 4528be4d90d1..ef0871dbcdb2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -328,7 +328,7 @@ static inline void prune_one_dentry(struct dentry * dentry) * all the dentries are in use. */ -void prune_dcache(int count) +static void prune_dcache(int count) { spin_lock(&dcache_lock); for (; count ; count--) { @@ -572,25 +572,24 @@ void shrink_dcache_anon(struct list_head *head) * This is called from kswapd when we think we need some * more memory. */ -int shrink_dcache_memory(int ratio, unsigned int gfp_mask) +static int shrink_dcache_memory(int nr, unsigned int gfp_mask) { - int entries = dentry_stat.nr_dentry / ratio + 1; - /* - * Nasty deadlock avoidance. - * - * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> - * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op-> - * put_inode->ext2_discard_prealloc->ext2_free_blocks->lock_super-> - * DEADLOCK. - * - * We should make sure we don't hold the superblock lock over - * block allocations, but for now: - */ - if (!(gfp_mask & __GFP_FS)) - return 0; - - prune_dcache(entries); - return entries; + if (nr) { + /* + * Nasty deadlock avoidance. + * + * ext2_new_block->getblk->GFP->shrink_dcache_memory-> + * prune_dcache->prune_one_dentry->dput->dentry_iput->iput-> + * inode->i_sb->s_op->put_inode->ext2_discard_prealloc-> + * ext2_free_blocks->lock_super->DEADLOCK. + * + * We should make sure we don't hold the superblock lock over + * block allocations, but for now: + */ + if (gfp_mask & __GFP_FS) + prune_dcache(nr); + } + return dentry_stat.nr_dentry; } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) @@ -1330,6 +1329,8 @@ static void __init dcache_init(unsigned long mempages) NULL, NULL); if (!dentry_cache) panic("Cannot create dentry cache"); + + set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); #if PAGE_SHIFT < 13 mempages >>= (13 - PAGE_SHIFT); @@ -1375,9 +1376,6 @@ kmem_cache_t *names_cachep; /* SLAB cache for file structures */ kmem_cache_t *filp_cachep; -/* SLAB cache for dquot structures */ -kmem_cache_t *dquot_cachep; - EXPORT_SYMBOL(d_genocide); extern void bdev_cache_init(void); @@ -1397,14 +1395,6 @@ void __init vfs_caches_init(unsigned long mempages) if(!filp_cachep) panic("Cannot create filp SLAB cache"); -#if defined (CONFIG_QUOTA) - dquot_cachep = kmem_cache_create("dquot", - sizeof(struct dquot), sizeof(unsigned long) * 4, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!dquot_cachep) - panic("Cannot create dquot SLAB cache"); -#endif - dcache_init(mempages); inode_init(mempages); files_init(mempages); diff --git a/fs/dquot.c b/fs/dquot.c index f97b3609c894..24d50ae34824 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -55,6 +55,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/fs.h> +#include <linux/mm.h> #include <linux/time.h> #include <linux/types.h> #include <linux/string.h> @@ -481,14 +482,14 @@ static void prune_dqcache(int count) * more memory */ -int shrink_dqcache_memory(int ratio, unsigned int gfp_mask) +static int shrink_dqcache_memory(int nr, unsigned int gfp_mask) { - int entries = dqstats.allocated_dquots / ratio + 1; - - lock_kernel(); - prune_dqcache(entries); - unlock_kernel(); - return entries; + if (nr) { + lock_kernel(); + prune_dqcache(nr); + unlock_kernel(); + } + return dqstats.allocated_dquots; } /* @@ -1490,6 +1491,9 @@ static ctl_table sys_table[] = { {}, }; +/* SLAB cache for dquot structures */ +kmem_cache_t *dquot_cachep; + static int __init dquot_init(void) { int i; @@ -1499,9 +1503,17 @@ static int __init dquot_init(void) INIT_LIST_HEAD(dquot_hash + i); printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__); + dquot_cachep = kmem_cache_create("dquot", + sizeof(struct dquot), sizeof(unsigned long) * 4, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!dquot_cachep) + panic("Cannot create dquot SLAB cache"); + + set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory); + return 0; } -__initcall(dquot_init); +module_init(dquot_init); EXPORT_SYMBOL(register_quota_format); EXPORT_SYMBOL(unregister_quota_format); diff --git a/fs/fcntl.c b/fs/fcntl.c index 539711ef1061..c2fc83cdfed6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -10,7 +10,6 @@ #include <linux/dnotify.h> #include <linux/smp_lock.h> #include <linux/slab.h> -#include <linux/iobuf.h> #include <linux/module.h> #include <linux/security.h> diff --git a/fs/file_table.c b/fs/file_table.c index d6093fc0b1b5..fe6c048c2bab 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/smp_lock.h> -#include <linux/iobuf.h> #include <linux/fs.h> #include <linux/security.h> diff --git a/fs/inode.c b/fs/inode.c index d56785889730..4f56d96031ea 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -243,22 +243,25 @@ void clear_inode(struct inode *inode) * Dispose-list gets a local list with local inodes in it, so it doesn't * need to worry about list corruption and SMP locks. */ -static void dispose_list(struct list_head * head) +static void dispose_list(struct list_head *head) { - struct list_head * inode_entry; - struct inode * inode; + int nr_disposed = 0; + + while (!list_empty(head)) { + struct inode *inode; - while ((inode_entry = head->next) != head) - { - list_del(inode_entry); + inode = list_entry(head->next, struct inode, i_list); + list_del(&inode->i_list); - inode = list_entry(inode_entry, struct inode, i_list); if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); destroy_inode(inode); - inodes_stat.nr_inodes--; + nr_disposed++; } + spin_lock(&inode_lock); + inodes_stat.nr_inodes -= nr_disposed; + spin_unlock(&inode_lock); } /* @@ -377,7 +380,7 @@ int invalidate_device(kdev_t dev, int do_sync) !inode_has_buffers(inode)) #define INODE(entry) (list_entry(entry, struct inode, i_list)) -void prune_icache(int goal) +static inline void prune_icache(int goal) { LIST_HEAD(list); struct list_head *entry, *freeable = &list; @@ -417,23 +420,19 @@ void prune_icache(int goal) * This is called from kswapd when we think we need some * more memory. */ -int shrink_icache_memory(int ratio, unsigned int gfp_mask) +static int shrink_icache_memory(int nr, unsigned int gfp_mask) { - int entries = inodes_stat.nr_inodes / ratio + 1; - /* - * Nasty deadlock avoidance.. - * - * We may hold various FS locks, and we don't - * want to recurse into the FS that called us - * in clear_inode() and friends.. - */ - if (!(gfp_mask & __GFP_FS)) - return 0; - - prune_icache(entries); - return entries; + if (nr) { + /* + * Nasty deadlock avoidance. We may hold various FS locks, + * and we don't want to recurse into the FS that called us + * in clear_inode() and friends.. + */ + if (gfp_mask & __GFP_FS) + prune_icache(nr); + } + return inodes_stat.nr_inodes; } -EXPORT_SYMBOL(shrink_icache_memory); /* * Called with the inode lock held. @@ -1226,4 +1225,6 @@ void __init inode_init(unsigned long mempages) NULL); if (!inode_cachep) panic("cannot create inode slab cache"); + + set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); } diff --git a/fs/iobuf.c b/fs/iobuf.c deleted file mode 100644 index 62c44534c68a..000000000000 --- a/fs/iobuf.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * iobuf.c - * - * Keep track of the general-purpose IO-buffer structures used to track - * abstract kernel-space io buffers. - * - */ - -#include <linux/iobuf.h> -#include <linux/slab.h> - -int end_kio_request(struct kiobuf *kiobuf, int uptodate) -{ - int ret = 1; - - if ((!uptodate) && !kiobuf->errno) - kiobuf->errno = -EIO; - - if (atomic_dec_and_test(&kiobuf->io_count)) { - ret = 0; - if (kiobuf->end_io) - kiobuf->end_io(kiobuf); - wake_up(&kiobuf->wait_queue); - } - - return ret; -} - -static void kiobuf_init(struct kiobuf *iobuf) -{ - init_waitqueue_head(&iobuf->wait_queue); - atomic_set(&iobuf->io_count, 0); - iobuf->array_len = KIO_STATIC_PAGES; - iobuf->maplist = iobuf->map_array; - iobuf->nr_pages = 0; - iobuf->locked = 0; - iobuf->io_count.counter = 0; - iobuf->end_io = NULL; -} - -int alloc_kiovec(int nr, struct kiobuf **bufp) -{ - int i; - struct kiobuf *iobuf; - - for (i = 0; i < nr; i++) { - iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); - if (!iobuf) { - free_kiovec(i, bufp); - return -ENOMEM; - } - kiobuf_init(iobuf); - bufp[i] = iobuf; - } - - return 0; -} - -void free_kiovec(int nr, struct kiobuf **bufp) -{ - int i; - struct kiobuf *iobuf; - - for (i = 0; i < nr; i++) { - iobuf = bufp[i]; - if (iobuf->locked) - unlock_kiovec(1, &iobuf); - if (iobuf->array_len > KIO_STATIC_PAGES) - kfree (iobuf->maplist); - kfree(bufp[i]); - } -} - -int expand_kiobuf(struct kiobuf *iobuf, int wanted) -{ - struct page ** maplist; - - if (iobuf->array_len >= wanted) - return 0; - - maplist = (struct page **) - kmalloc(wanted * sizeof(struct page **), GFP_KERNEL); - if (!maplist) - return -ENOMEM; - - /* Did it grow while we waited? */ - if (iobuf->array_len >= wanted) { - kfree(maplist); - return 0; - } - - memcpy (maplist, iobuf->maplist, iobuf->array_len * sizeof(struct page **)); - - if (iobuf->array_len > KIO_STATIC_PAGES) - kfree (iobuf->maplist); - - iobuf->maplist = maplist; - iobuf->array_len = wanted; - return 0; -} - - -void kiobuf_wait_for_io(struct kiobuf *kiobuf) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - if (atomic_read(&kiobuf->io_count) == 0) - return; - - add_wait_queue(&kiobuf->wait_queue, &wait); -repeat: - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (atomic_read(&kiobuf->io_count) != 0) { - blk_run_queues(); - schedule(); - if (atomic_read(&kiobuf->io_count) != 0) - goto repeat; - } - tsk->state = TASK_RUNNING; - remove_wait_queue(&kiobuf->wait_queue, &wait); -} - - - diff --git a/fs/open.c b/fs/open.c index 673d20cd1ee8..3e690b0cd50d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/tty.h> -#include <linux/iobuf.h> #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/security.h> diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c index 184d173ee192..e749c3c3bbed 100644 --- a/fs/xfs/linux/xfs_aops.c +++ b/fs/xfs/linux/xfs_aops.c @@ -34,7 +34,6 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/mpage.h> -#include <linux/iobuf.h> STATIC int delalloc_convert(struct inode *, struct page *, int, int); diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c index a2b5f0162ccd..5dbf4fd9debf 100644 --- a/fs/xfs/linux/xfs_ioctl.c +++ b/fs/xfs/linux/xfs_ioctl.c @@ -35,7 +35,6 @@ #include <xfs_dfrag.h> #include <linux/dcache.h> #include <linux/namei.h> -#include <linux/iobuf.h> extern int xfs_change_file_space(bhv_desc_t *, int, @@ -605,6 +604,7 @@ xfs_ioctl( * it is set to the file system block size to * avoid having to do block zeroing on short writes. */ +#define KIO_MAX_ATOMIC_IO 512 /* FIXME: what do we really want here? */ da.d_maxiosz = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 71732e1216fc..0760d97cd6f9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) struct page; -struct kiobuf; struct buffer_head; struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0abaaaa2c96d..71708edafce9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,17 +180,6 @@ extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_anon(struct list_head *); extern int d_invalidate(struct dentry *); -/* dcache memory management */ -extern int shrink_dcache_memory(int, unsigned int); -extern void prune_dcache(int); - -/* icache memory management (defined in linux/fs/inode.c) */ -extern int shrink_icache_memory(int, unsigned int); -extern void prune_icache(int); - -/* quota cache memory management (defined in linux/fs/dquot.c) */ -extern int shrink_dqcache_memory(int, unsigned int); - /* only used at mount-time */ extern struct dentry * d_alloc_root(struct inode *); diff --git a/include/linux/iobuf.h b/include/linux/iobuf.h deleted file mode 100644 index fb147b5c48a7..000000000000 --- a/include/linux/iobuf.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * iobuf.h - * - * Defines the structures used to track abstract kernel-space io buffers. - * - */ - -#ifndef __LINUX_IOBUF_H -#define __LINUX_IOBUF_H - -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <asm/atomic.h> - -/* - * The kiobuf structure describes a physical set of pages reserved - * locked for IO. The reference counts on each page will have been - * incremented, and the flags field will indicate whether or not we have - * pre-locked all of the pages for IO. - * - * kiobufs may be passed in arrays to form a kiovec, but we must - * preserve the property that no page is present more than once over the - * entire iovec. - */ - -#define KIO_MAX_ATOMIC_IO 512 /* in kb */ -#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) - -/* The main kiobuf struct */ - -struct kiobuf -{ - int nr_pages; /* Pages actually referenced */ - int array_len; /* Space in the allocated lists */ - int offset; /* Offset to start of valid data */ - int length; /* Number of valid bytes of data */ - - /* Keep separate track of the physical addresses and page - * structs involved. If we do IO to a memory-mapped device - * region, there won't necessarily be page structs defined for - * every address. */ - - struct page ** maplist; - - unsigned int locked : 1; /* If set, pages has been locked */ - - /* Always embed enough struct pages for atomic IO */ - struct page * map_array[KIO_STATIC_PAGES]; - sector_t blocks[KIO_MAX_SECTORS]; - - /* Dynamic state for IO completion: */ - atomic_t io_count; /* IOs still in progress */ - int errno; /* Status of completed IO */ - void (*end_io) (struct kiobuf *); /* Completion callback */ - wait_queue_head_t wait_queue; -}; - - -/* mm/memory.c */ - -int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len); -void unmap_kiobuf(struct kiobuf *iobuf); -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait); -int unlock_kiovec(int nr, struct kiobuf *iovec[]); -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes); - -/* fs/iobuf.c */ - -int end_kio_request(struct kiobuf *, int); -void simple_wakeup_kiobuf(struct kiobuf *); -int alloc_kiovec(int nr, struct kiobuf **); -void free_kiovec(int nr, struct kiobuf **); -int expand_kiobuf(struct kiobuf *, int); -void kiobuf_wait_for_io(struct kiobuf *); -extern int alloc_kiobuf_bhs(struct kiobuf *); -extern void free_kiobuf_bhs(struct kiobuf *); - -/* fs/buffer.c */ - -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - struct block_device *bdev, sector_t [], int size); - -/* fs/bio.c */ -void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t block); - -#endif /* __LINUX_IOBUF_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a5107b5043f7..a6c66cc418ee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -392,6 +392,29 @@ extern int free_hugepages(struct vm_area_struct *); /* + * Prototype to add a shrinker callback for ageable caches. + * + * These functions are passed a count `nr_to_scan' and a gfpmask. They should + * scan `nr_to_scan' objects, attempting to free them. + * + * The callback must the number of objects which remain in the cache. + * + * The callback will be passes nr_to_scan == 0 when the VM is querying the + * cache size, so a fastpath for that case is appropriate. + */ +typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask); + +/* + * Add an aging callback. The int is the number of 'seeks' it takes + * to recreate one of the objects that these functions age. + */ + +#define DEFAULT_SEEKS 2 +struct shrinker; +extern struct shrinker *set_shrinker(int, shrinker_t); +extern void remove_shrinker(struct shrinker *shrinker); + +/* * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. * FIXME: make the method unconditional. diff --git a/init/main.c b/init/main.c index f69c298b9a6f..c6023edc03f3 100644 --- a/init/main.c +++ b/init/main.c @@ -24,7 +24,6 @@ #include <linux/smp_lock.h> #include <linux/blk.h> #include <linux/hdreg.h> -#include <linux/iobuf.h> #include <linux/bootmem.h> #include <linux/tty.h> #include <linux/percpu.h> diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 9beb67e2a999..4b3e40b10a76 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -33,7 +33,6 @@ #include <linux/swap.h> #include <linux/ctype.h> #include <linux/file.h> -#include <linux/iobuf.h> #include <linux/console.h> #include <linux/poll.h> #include <linux/mmzone.h> @@ -103,6 +102,8 @@ EXPORT_SYMBOL(kmem_cache_shrink); EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_size); +EXPORT_SYMBOL(set_shrinker); +EXPORT_SYMBOL(remove_shrinker); EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(vfree); @@ -246,7 +247,6 @@ EXPORT_SYMBOL(dput); EXPORT_SYMBOL(have_submounts); EXPORT_SYMBOL(d_find_alias); EXPORT_SYMBOL(d_prune_aliases); -EXPORT_SYMBOL(prune_dcache); EXPORT_SYMBOL(shrink_dcache_sb); EXPORT_SYMBOL(shrink_dcache_parent); EXPORT_SYMBOL(shrink_dcache_anon); @@ -438,18 +438,6 @@ EXPORT_SYMBOL(__br_write_lock); EXPORT_SYMBOL(__br_write_unlock); #endif -/* Kiobufs */ -EXPORT_SYMBOL(alloc_kiovec); -EXPORT_SYMBOL(free_kiovec); -EXPORT_SYMBOL(expand_kiobuf); - -EXPORT_SYMBOL(map_user_kiobuf); -EXPORT_SYMBOL(unmap_kiobuf); -EXPORT_SYMBOL(lock_kiovec); -EXPORT_SYMBOL(unlock_kiovec); -EXPORT_SYMBOL(brw_kiovec); -EXPORT_SYMBOL(kiobuf_wait_for_io); - #ifdef HAVE_DISABLE_HLT EXPORT_SYMBOL(disable_hlt); EXPORT_SYMBOL(enable_hlt); diff --git a/mm/filemap.c b/mm/filemap.c index 4c25b92352c0..b2fbb1cbf90b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -20,7 +20,6 @@ #include <linux/pagemap.h> #include <linux/file.h> #include <linux/uio.h> -#include <linux/iobuf.h> #include <linux/hash.h> #include <linux/writeback.h> #include <linux/pagevec.h> diff --git a/mm/memory.c b/mm/memory.c index 1c8d8af264f9..70403c0cb902 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -40,7 +40,6 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/swap.h> -#include <linux/iobuf.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/vcache.h> @@ -504,7 +503,7 @@ out: /* * Given a physical address, is there a useful struct page pointing to * it? This may become more complex in the future if we start dealing - * with IO-aperture pages in kiobufs. + * with IO-aperture pages for direct-IO. */ static inline struct page *get_page_map(struct page *page) @@ -589,224 +588,6 @@ out: return i; } -/* - * Force in an entire range of pages from the current process's user VA, - * and pin them in physical memory. - */ -#define dprintk(x...) - -int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) -{ - int pgcount, err; - struct mm_struct * mm; - - /* Make sure the iobuf is not already mapped somewhere. */ - if (iobuf->nr_pages) - return -EINVAL; - - mm = current->mm; - dprintk ("map_user_kiobuf: begin\n"); - - pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; - /* mapping 0 bytes is not permitted */ - if (!pgcount) BUG(); - err = expand_kiobuf(iobuf, pgcount); - if (err) - return err; - - iobuf->locked = 0; - iobuf->offset = va & (PAGE_SIZE-1); - iobuf->length = len; - - /* Try to fault in all of the necessary pages */ - down_read(&mm->mmap_sem); - /* rw==READ means read from disk, write into memory area */ - err = get_user_pages(current, mm, va, pgcount, - (rw==READ), 0, iobuf->maplist, NULL); - up_read(&mm->mmap_sem); - if (err < 0) { - unmap_kiobuf(iobuf); - dprintk ("map_user_kiobuf: end %d\n", err); - return err; - } - iobuf->nr_pages = err; - while (pgcount--) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(iobuf->maplist[pgcount]); - } - dprintk ("map_user_kiobuf: end OK\n"); - return 0; -} - -/* - * Mark all of the pages in a kiobuf as dirty - * - * We need to be able to deal with short reads from disk: if an IO error - * occurs, the number of bytes read into memory may be less than the - * size of the kiobuf, so we have to stop marking pages dirty once the - * requested byte count has been reached. - */ - -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes) -{ - int index, offset, remaining; - struct page *page; - - index = iobuf->offset >> PAGE_SHIFT; - offset = iobuf->offset & ~PAGE_MASK; - remaining = bytes; - if (remaining > iobuf->length) - remaining = iobuf->length; - - while (remaining > 0 && index < iobuf->nr_pages) { - page = iobuf->maplist[index]; - - if (!PageReserved(page)) - set_page_dirty(page); - - remaining -= (PAGE_SIZE - offset); - offset = 0; - index++; - } -} - -/* - * Unmap all of the pages referenced by a kiobuf. We release the pages, - * and unlock them if they were locked. - */ - -void unmap_kiobuf (struct kiobuf *iobuf) -{ - int i; - struct page *map; - - for (i = 0; i < iobuf->nr_pages; i++) { - map = iobuf->maplist[i]; - if (map) { - if (iobuf->locked) - unlock_page(map); - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(map); - } - } - - iobuf->nr_pages = 0; - iobuf->locked = 0; -} - - -/* - * Lock down all of the pages of a kiovec for IO. - * - * If any page is mapped twice in the kiovec, we return the error -EINVAL. - * - * The optional wait parameter causes the lock call to block until all - * pages can be locked if set. If wait==0, the lock operation is - * aborted if any locked pages are found and -EAGAIN is returned. - */ - -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - int doublepage = 0; - int repeat = 0; - - repeat: - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (iobuf->locked) - continue; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - - if (TestSetPageLocked(page)) { - while (j--) { - struct page *tmp = *--ppage; - if (tmp) - unlock_page(tmp); - } - goto retry; - } - } - iobuf->locked = 1; - } - - return 0; - - retry: - - /* - * We couldn't lock one of the pages. Undo the locking so far, - * wait on the page we got to, and try again. - */ - - unlock_kiovec(nr, iovec); - if (!wait) - return -EAGAIN; - - /* - * Did the release also unlock the page we got stuck on? - */ - if (!PageLocked(page)) { - /* - * If so, we may well have the page mapped twice - * in the IO address range. Bad news. Of - * course, it _might_ just be a coincidence, - * but if it happens more than once, chances - * are we have a double-mapped page. - */ - if (++doublepage >= 3) - return -EINVAL; - - /* Try again... */ - wait_on_page_locked(page); - } - - if (++repeat < 16) - goto repeat; - return -EAGAIN; -} - -/* - * Unlock all of the pages of a kiovec after IO. - */ - -int unlock_kiovec(int nr, struct kiobuf *iovec[]) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (!iobuf->locked) - continue; - iobuf->locked = 0; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - unlock_page(page); - } - } - return 0; -} - static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pgprot_t prot) { diff --git a/mm/msync.c b/mm/msync.c index 7559fb30a062..3674d92253d5 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -137,6 +137,9 @@ static int msync_interval(struct vm_area_struct * vma, int ret = 0; struct file * file = vma->vm_file; + if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) + return -EBUSY; + if (file && (vma->vm_flags & VM_SHARED)) { ret = filemap_sync(vma, start, end-start, flags); @@ -173,6 +176,8 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) goto out; if (start & ~PAGE_MASK) goto out; + if ((flags & MS_ASYNC) && (flags & MS_SYNC)) + goto out; error = -ENOMEM; len = (len + ~PAGE_MASK) & PAGE_MASK; end = start + len; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d5af91e50bc8..5845586fb6bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1147,6 +1147,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) if (!ps) return ERR_PTR(-ENOMEM); get_full_page_state(ps); + ps->pgpgin /= 2; /* sectors -> kbytes */ + ps->pgpgout /= 2; return (unsigned long *)ps + *pos; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 0086407047f6..31856732ed7b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -77,9 +77,94 @@ static long total_memory; #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #endif -#ifndef CONFIG_QUOTA -#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0) -#endif +/* + * The list of shrinker callbacks used by to apply pressure to + * ageable caches. + */ +struct shrinker { + shrinker_t shrinker; + struct list_head list; + int seeks; /* seeks to recreate an obj */ + int nr; /* objs pending delete */ +}; + +static LIST_HEAD(shrinker_list); +static DECLARE_MUTEX(shrinker_sem); + +/* + * Add a shrinker callback to be called from the vm + */ +struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker) +{ + struct shrinker *shrinker; + + shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL); + if (shrinker) { + shrinker->shrinker = theshrinker; + shrinker->seeks = seeks; + shrinker->nr = 0; + down(&shrinker_sem); + list_add(&shrinker->list, &shrinker_list); + up(&shrinker_sem); + } + return shrinker; +} + +/* + * Remove one + */ +void remove_shrinker(struct shrinker *shrinker) +{ + down(&shrinker_sem); + list_del(&shrinker->list); + up(&shrinker_sem); + kfree(shrinker); +} + +#define SHRINK_BATCH 32 +/* + * Call the shrink functions to age shrinkable caches + * + * Here we assume it costs one seek to replace a lru page and that it also + * takes a seek to recreate a cache object. With this in mind we age equal + * percentages of the lru and ageable caches. This should balance the seeks + * generated by these structures. + * + * If the vm encounted mapped pages on the LRU it increase the pressure on + * slab to avoid swapping. + * + * FIXME: do not do for zone highmem + */ +static int shrink_slab(int scanned, unsigned int gfp_mask) +{ + struct list_head *lh; + int pages; + + if (down_trylock(&shrinker_sem)) + return 0; + + pages = nr_used_zone_pages(); + list_for_each(lh, &shrinker_list) { + struct shrinker *shrinker; + int entries; + unsigned long delta; + + shrinker = list_entry(lh, struct shrinker, list); + entries = (*shrinker->shrinker)(0, gfp_mask); + if (!entries) + continue; + delta = scanned * shrinker->seeks * entries; + shrinker->nr += delta / (pages + 1); + if (shrinker->nr > SHRINK_BATCH) { + int nr = shrinker->nr; + + shrinker->nr = 0; + (*shrinker->shrinker)(nr, gfp_mask); + } + } + up(&shrinker_sem); + return 0; +} /* Must be called with page's pte_chain_lock held. */ static inline int page_mapping_inuse(struct page * page) @@ -627,32 +712,6 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, } /* - * FIXME: don't do this for ZONE_HIGHMEM - */ -/* - * Here we assume it costs one seek to replace a lru page and that it also - * takes a seek to recreate a cache object. With this in mind we age equal - * percentages of the lru and ageable caches. This should balance the seeks - * generated by these structures. - * - * NOTE: for now I do this for all zones. If we find this is too aggressive - * on large boxes we may want to exclude ZONE_HIGHMEM. - * - * If we're encountering mapped pages on the LRU then increase the pressure on - * slab to avoid swapping. - */ -static void shrink_slab(int total_scanned, int gfp_mask) -{ - int shrink_ratio; - int pages = nr_used_zone_pages(); - - shrink_ratio = (pages / (total_scanned + 1)) + 1; - shrink_dcache_memory(shrink_ratio, gfp_mask); - shrink_icache_memory(shrink_ratio, gfp_mask); - shrink_dqcache_memory(shrink_ratio, gfp_mask); -} - -/* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation * request. @@ -695,7 +754,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned, } return ret; } - + /* * This is the main entry point to direct page reclaim. * |
