From 71419dc7e039a8953861df2a28fad639d12ae6b9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 13 Oct 2002 02:58:45 -0700 Subject: [PATCH] batched slab shrink and registration API From Ed Tomlinson, then mauled by yours truly. The current shrinking of the dentry, inode and dquot caches seems to work OK, but it is slightly CPU-inefficient: we call the shrinking functions many times, for tiny numbers of objects. So here, we just batch that up - shrinking happens at the same rate but we perform it in larger units of work. To do this, we need a way of knowing how many objects are currently in use by individual caches. slab does not actually track this information, but the existing shrinkable caches do have this on hand. So rather than adding the counters to slab, we require that the shrinker callback functions keep their own count - we query that via the callback. We add a simple registration API which is exported to modules. A subsystem may register its own callback function via set_shrinker(). set_shrinker() simply takes a function pointer. The function is called with int (*shrinker)(int nr_to_shrink, unsigned int gfp_mask); The shrinker callback must scan `nr_to_scan' objects and free all freeable scanned objects. Note: it doesn't have to *free* `nr_to_scan' objects. It need only scan that many. Which is a fairly pedantic detail, really. The shrinker callback must return the number of objects which are in its cache at the end of the scanning attempt. It will be called with nr_to_scan == 0 when we're just querying the cache size. The set_shrinker() registration API is passed a hint as to how many disk seeks a single cache object is worth. Everything uses "2" at present. I saw no need to add the traditional `here is my void *data' to the registration/callback. Because there is a one-to-one relationship between caches and their shrinkers. Various cleanups became possible: - shrink_icache_memory() is no longer exported to modules. - shrink_icache_memory() is now static to fs/inode.c - prune_icache() is now static to fs/inode.c, and made inline (single caller) - shrink_dcache_memory() is made static to fs/dcache.c - prune_dcache() is no longer exported to modules - prune_dcache() is made static to fs/dcache.c - shrink_dqcache_memory() is made static to fs/dquot.c - All the quota init code has been moved from fs/dcache.c into fs/dquot.c - All modifications to inodes_stat.nr_inodes are now inside inode_lock - the dispose_list one was racy. --- include/linux/dcache.h | 11 ----------- include/linux/mm.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0abaaaa2c96d..71708edafce9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,17 +180,6 @@ extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_anon(struct list_head *); extern int d_invalidate(struct dentry *); -/* dcache memory management */ -extern int shrink_dcache_memory(int, unsigned int); -extern void prune_dcache(int); - -/* icache memory management (defined in linux/fs/inode.c) */ -extern int shrink_icache_memory(int, unsigned int); -extern void prune_icache(int); - -/* quota cache memory management (defined in linux/fs/dquot.c) */ -extern int shrink_dqcache_memory(int, unsigned int); - /* only used at mount-time */ extern struct dentry * d_alloc_root(struct inode *); diff --git a/include/linux/mm.h b/include/linux/mm.h index a5107b5043f7..a6c66cc418ee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -391,6 +391,29 @@ extern int free_hugepages(struct vm_area_struct *); #endif +/* + * Prototype to add a shrinker callback for ageable caches. + * + * These functions are passed a count `nr_to_scan' and a gfpmask. They should + * scan `nr_to_scan' objects, attempting to free them. + * + * The callback must the number of objects which remain in the cache. + * + * The callback will be passes nr_to_scan == 0 when the VM is querying the + * cache size, so a fastpath for that case is appropriate. + */ +typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask); + +/* + * Add an aging callback. The int is the number of 'seeks' it takes + * to recreate one of the objects that these functions age. + */ + +#define DEFAULT_SEEKS 2 +struct shrinker; +extern struct shrinker *set_shrinker(int, shrinker_t); +extern void remove_shrinker(struct shrinker *shrinker); + /* * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. -- cgit v1.2.3 From 2dcb8ff9ea7bfdc161eec1eeb8f94c2ba5c3c8a8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 13 Oct 2002 02:59:10 -0700 Subject: [PATCH] remove kiobufs This patch from Christoph Hellwig removes the kiobuf/kiovec infrastructure. This affects three subsystems: video-buf.c: This patch includes an earlier diff from Gerd which converts video-buf.c to use get_user_pages() directly. Gerd has acked this patch. LVM1: Is now even more broken. drivers/mtd/devices/blkmtd.c: blkmtd is broken by this change. I contacted Simon Evans, who said "I had done a rewrite of blkmtd anyway and just need to convert it to BIO. Feel free to break it in the 2.5 tree, it will force me to finish my code." Neither EVMS nor LVM2 use kiobufs. The only remaining breakage of which I am aware is a proprietary MPEG2 streaming module. It could use get_user_pages(). --- arch/cris/drivers/examples/kiobuftest.c | 111 ---------------- drivers/media/video/bttv-risc.c | 1 - drivers/media/video/bttvp.h | 1 - drivers/media/video/video-buf.c | 109 ++++++++++------ drivers/media/video/video-buf.h | 8 +- fs/Makefile | 2 +- fs/aio.c | 7 +- fs/bio.c | 125 +----------------- fs/block_dev.c | 1 - fs/buffer.c | 60 --------- fs/fcntl.c | 1 - fs/file_table.c | 1 - fs/iobuf.c | 125 ------------------ fs/open.c | 1 - fs/xfs/linux/xfs_aops.c | 1 - fs/xfs/linux/xfs_ioctl.c | 2 +- include/linux/buffer_head.h | 1 - include/linux/iobuf.h | 88 ------------- init/main.c | 1 - kernel/ksyms.c | 13 -- mm/filemap.c | 1 - mm/memory.c | 221 +------------------------------- 22 files changed, 82 insertions(+), 799 deletions(-) delete mode 100644 arch/cris/drivers/examples/kiobuftest.c delete mode 100644 fs/iobuf.c delete mode 100644 include/linux/iobuf.h (limited to 'include/linux') diff --git a/arch/cris/drivers/examples/kiobuftest.c b/arch/cris/drivers/examples/kiobuftest.c deleted file mode 100644 index 784418f9c4d6..000000000000 --- a/arch/cris/drivers/examples/kiobuftest.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Example showing how to pin down a range of virtual pages from user-space - * to be able to do for example DMA directly into them. - * - * It is necessary because the pages the virtual pointers reference, might - * not exist in memory (could be mapped to the zero-page, filemapped etc) - * and DMA cannot trigger the MMU to force them in (and would have time - * contraints making it impossible to wait for it anyway). - * - * Author: Bjorn Wesen - * - * $Log: kiobuftest.c,v $ - * Revision 1.1.1.1 2001/12/17 13:59:27 bjornw - * Import of Linux 2.5.1 - * - * Revision 1.2 2001/02/27 13:52:50 bjornw - * malloc.h -> slab.h - * - * Revision 1.1 2001/01/19 15:57:49 bjornw - * Example of how to do direct HW -> user-mode DMA - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define KIOBUFTEST_MAJOR 124 /* in the local range, experimental */ - - -static ssize_t -kiobuf_read(struct file *filp, char *buf, size_t len, loff_t *ppos) -{ - - struct kiobuf *iobuf; - int res, i; - - /* Make a kiobuf that maps the entire length the reader has given - * us - */ - - res = alloc_kiovec(1, &iobuf); - if (res) - return res; - - if((res = map_user_kiobuf(READ, iobuf, (unsigned long)buf, len))) { - printk("map_user_kiobuf failed, return %d\n", res); - return res; - } - - /* At this point, the virtual area buf[0] -> buf[len-1] will - * have corresponding pages mapped in physical memory and locked - * until we unmap the kiobuf. They cannot be swapped out or moved - * around. - */ - - printk("nr_pages == %d\noffset == %d\nlength == %d\n", - iobuf->nr_pages, iobuf->offset, iobuf->length); - - for(i = 0; i < iobuf->nr_pages; i++) { - printk("page_add(maplist[%d]) == 0x%x\n", i, - page_address(iobuf->maplist[i])); - } - - /* This is the place to create the necessary scatter-gather vector - * for the DMA using the iobuf->maplist array and page_address - * (don't forget __pa if the DMA needs the actual physical DRAM address) - * and run it. - */ - - - - - /* Release the mapping and exit */ - - unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */ - - return len; -} - - -static struct file_operations kiobuf_fops = { - owner: THIS_MODULE, - read: kiobuf_read -}; - -static int __init -kiobuftest_init(void) -{ - int res; - - /* register char device */ - - res = register_chrdev(KIOBUFTEST_MAJOR, "kiobuftest", &kiobuf_fops); - if(res < 0) { - printk(KERN_ERR "kiobuftest: couldn't get a major number.\n"); - return res; - } - - printk("Initializing kiobuf-test device\n"); -} - -module_init(kiobuftest_init); diff --git a/drivers/media/video/bttv-risc.c b/drivers/media/video/bttv-risc.c index d63b5b48481c..ddb6f4328189 100644 --- a/drivers/media/video/bttv-risc.c +++ b/drivers/media/video/bttv-risc.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h index c58be937f95f..01443316cf39 100644 --- a/drivers/media/video/bttvp.h +++ b/drivers/media/video/bttvp.h @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/video-buf.c b/drivers/media/video/video-buf.c index d1c783401b29..03d3bbd1356a 100644 --- a/drivers/media/video/video-buf.c +++ b/drivers/media/video/video-buf.c @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include #include @@ -65,32 +65,31 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages) return NULL; } -struct scatterlist* -videobuf_iobuf_to_sg(struct kiobuf *iobuf) +struct scatterlist * +videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset) { struct scatterlist *sglist; int i = 0; - - sglist = kmalloc(sizeof(struct scatterlist) * iobuf->nr_pages, - GFP_KERNEL); + + if (NULL == pages[0]) + return NULL; + sglist = kmalloc(sizeof(*sglist) * nr_pages, GFP_KERNEL); if (NULL == sglist) return NULL; - memset(sglist,0,sizeof(struct scatterlist) * iobuf->nr_pages); + memset(sglist, 0, sizeof(*sglist) * nr_pages); - if (NULL == iobuf->maplist[0]) - goto err; - if (PageHighMem(iobuf->maplist[0])) + if (PageHighMem(pages[0])) /* DMA to highmem pages might not work */ goto err; - sglist[0].page = iobuf->maplist[0]; - sglist[0].offset = iobuf->offset; - sglist[0].length = PAGE_SIZE - iobuf->offset; - for (i = 1; i < iobuf->nr_pages; i++) { - if (NULL == iobuf->maplist[i]) + sglist[0].page = pages[0]; + sglist[0].offset = offset; + sglist[0].length = PAGE_SIZE - offset; + for (i = 1; i < nr_pages; i++) { + if (NULL == pages[i]) goto err; - if (PageHighMem(iobuf->maplist[i])) + if (PageHighMem(pages[i])) goto err; - sglist[i].page = iobuf->maplist[i]; + sglist[i].page = pages[i]; sglist[i].length = PAGE_SIZE; } return sglist; @@ -100,6 +99,30 @@ videobuf_iobuf_to_sg(struct kiobuf *iobuf) return NULL; } +int videobuf_lock(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + if (TestSetPageLocked(pages[i])) + goto err; + return 0; + + err: + while (i > 0) + unlock_page(pages[--i]); + return -EINVAL; +} + +int videobuf_unlock(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + unlock_page(pages[i]); + return 0; +} + /* --------------------------------------------------------------------- */ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, @@ -113,14 +136,21 @@ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, case PCI_DMA_TODEVICE: rw = WRITE; break; default: BUG(); } - if (0 != (err = alloc_kiovec(1,&dma->iobuf))) - return err; - if (0 != (err = map_user_kiobuf(rw, dma->iobuf, data, size))) { - dprintk(1,"map_user_kiobuf: %d\n",err); - return err; - } - dma->nr_pages = dma->iobuf->nr_pages; - return 0; + + dma->offset = data & PAGE_MASK; + dma->nr_pages = ((((data+size) & ~PAGE_MASK) - + (data & ~PAGE_MASK)) >> PAGE_SHIFT) +1; + dma->pages = kmalloc(dma->nr_pages * sizeof(struct page*), + GFP_KERNEL); + if (NULL == dma->pages) + return -ENOMEM; + down_read(¤t->mm->mmap_sem); + err = get_user_pages(current,current->mm, + data, dma->nr_pages, + rw == READ, 0, /* don't force */ + dma->pages, NULL); + up_read(¤t->mm->mmap_sem); + return err; } int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction, @@ -144,13 +174,15 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) if (0 == dma->nr_pages) BUG(); - if (dma->iobuf) { - if (0 != (err = lock_kiovec(1,&dma->iobuf,1))) { - dprintk(1,"lock_kiovec: %d\n",err); + if (dma->pages) { + if (0 != (err = videobuf_lock(dma->pages, dma->nr_pages))) { + dprintk(1,"videobuf_lock_pages: %d\n",err); return err; } - dma->sglist = videobuf_iobuf_to_sg(dma->iobuf); + dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages, + dma->offset); } + if (dma->vmalloc) { dma->sglist = videobuf_vmalloc_to_sg (dma->vmalloc,dma->nr_pages); @@ -160,7 +192,7 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) return -ENOMEM; } dma->sglen = pci_map_sg(dev,dma->sglist,dma->nr_pages, - dma->direction); + dma->direction); return 0; } @@ -182,8 +214,8 @@ int videobuf_dma_pci_unmap(struct pci_dev *dev, struct videobuf_dmabuf *dma) kfree(dma->sglist); dma->sglist = NULL; dma->sglen = 0; - if (dma->iobuf) - unlock_kiovec(1,&dma->iobuf); + if (dma->pages) + videobuf_lock(dma->pages, dma->nr_pages); return 0; } @@ -192,11 +224,14 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) if (dma->sglen) BUG(); - if (dma->iobuf) { - unmap_kiobuf(dma->iobuf); - free_kiovec(1,&dma->iobuf); - dma->iobuf = NULL; + if (dma->pages) { + int i; + for (i=0; i < dma->nr_pages; i++) + page_cache_release(dma->pages[i]); + kfree(dma->pages); + dma->pages = NULL; } + if (dma->vmalloc) { vfree(dma->vmalloc); dma->vmalloc = NULL; @@ -959,6 +994,7 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, map->q = q; vma->vm_ops = &videobuf_vm_ops; vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */ vma->vm_private_data = map; dprintk(1,"mmap %p: %08lx-%08lx pgoff %08lx bufs %d-%d\n", map,vma->vm_start,vma->vm_end,vma->vm_pgoff,first,last); @@ -972,7 +1008,6 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, /* --------------------------------------------------------------------- */ EXPORT_SYMBOL_GPL(videobuf_vmalloc_to_sg); -EXPORT_SYMBOL_GPL(videobuf_iobuf_to_sg); EXPORT_SYMBOL_GPL(videobuf_dma_init_user); EXPORT_SYMBOL_GPL(videobuf_dma_init_kernel); diff --git a/drivers/media/video/video-buf.h b/drivers/media/video/video-buf.h index 0e2c5860b953..3d8710848ca1 100644 --- a/drivers/media/video/video-buf.h +++ b/drivers/media/video/video-buf.h @@ -28,11 +28,12 @@ struct scatterlist* videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages); /* - * Return a scatterlist for a locked iobuf (NULL on errors). Memory + * Return a scatterlist for a an array of userpages (NULL on errors). Memory * for the scatterlist is allocated using kmalloc. The caller must * free the memory. */ -struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); +struct scatterlist *videobuf_pages_to_sg(struct page **pages, int nr_pages, + int offset); /* --------------------------------------------------------------------- */ @@ -57,7 +58,8 @@ struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); struct videobuf_dmabuf { /* for userland buffer */ - struct kiobuf *iobuf; + struct page **pages; + int offset; /* for kernel buffers */ void *vmalloc; diff --git a/fs/Makefile b/fs/Makefile index 7f349ff168ad..d902bdd8bda3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \ obj-y := open.o read_write.o devices.o file_table.o buffer.o \ bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ - dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ + dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \ filesystems.o namespace.o seq_file.o xattr.o libfs.o \ fs-writeback.o mpage.o direct-io.o aio.o diff --git a/fs/aio.c b/fs/aio.c index e561c8b83a00..6b51c1316ab2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -9,6 +9,7 @@ * See ../COPYING for licensing terms. */ #include +#include #include #include #include @@ -21,15 +22,9 @@ #include #include #include -#include -#include #include #include -#include #include -#include -#include -#include #include #include #include diff --git a/fs/bio.c b/fs/bio.c index 407f18c90a48..6c196406c90a 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -438,128 +438,6 @@ retry_segments: return 0; } -static int bio_end_io_kio(struct bio *bio, unsigned int bytes_done, int error) -{ - struct kiobuf *kio = (struct kiobuf *) bio->bi_private; - - if (bio->bi_size) - return 1; - - end_kio_request(kio, error); - bio_put(bio); - return 0; -} - -/** - * ll_rw_kio - submit a &struct kiobuf for I/O - * @rw: %READ or %WRITE - * @kio: the kiobuf to do I/O on - * @bdev: target device - * @sector: start location on disk - * - * Description: - * ll_rw_kio will map the page list inside the &struct kiobuf to - * &struct bio and queue them for I/O. The kiobuf given must describe - * a continous range of data, and must be fully prepared for I/O. - **/ -void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector) -{ - int i, offset, size, err, map_i, total_nr_pages, nr_pages; - struct bio *bio; - - err = 0; - if ((rw & WRITE) && bdev_read_only(bdev)) { - printk("ll_rw_bio: WRITE to ro device %s\n", bdevname(bdev)); - err = -EPERM; - goto out; - } - - if (!kio->nr_pages) { - err = -EINVAL; - goto out; - } - - /* - * maybe kio is bigger than the max we can easily map into a bio. - * if so, split it up in appropriately sized chunks. - */ - total_nr_pages = kio->nr_pages; - offset = kio->offset & ~PAGE_MASK; - size = kio->length; - - atomic_set(&kio->io_count, 1); - - map_i = 0; - -next_chunk: - nr_pages = BIO_MAX_PAGES; - if (nr_pages > total_nr_pages) - nr_pages = total_nr_pages; - - atomic_inc(&kio->io_count); - - /* - * allocate bio and do initial setup - */ - if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) { - err = -ENOMEM; - goto out; - } - - bio->bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_idx = 0; - bio->bi_end_io = bio_end_io_kio; - bio->bi_private = kio; - - for (i = 0; i < nr_pages; i++, map_i++) { - int nbytes = PAGE_SIZE - offset; - - if (nbytes > size) - nbytes = size; - - BUG_ON(kio->maplist[map_i] == NULL); - - /* - * if we can't add this page to the bio, submit for i/o - * and alloc a new one if needed - */ - if (bio_add_page(bio, kio->maplist[map_i], nbytes, offset)) - break; - - /* - * kiobuf only has an offset into the first page - */ - offset = 0; - - sector += nbytes >> 9; - size -= nbytes; - total_nr_pages--; - kio->offset += nbytes; - } - - submit_bio(rw, bio); - - if (total_nr_pages) - goto next_chunk; - - if (size) { - printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length); - BUG(); - } - -out: - if (err) - kio->errno = err; - - /* - * final atomic_dec of io_count to match our initial setting of 1. - * I/O may or may not have completed at this point, final completion - * handler is only run on last decrement. - */ - end_kio_request(kio, !err); -} - /** * bio_endio - end I/O on a bio * @bio: bio @@ -662,7 +540,6 @@ module_init(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); -EXPORT_SYMBOL(ll_rw_kio); EXPORT_SYMBOL(bio_endio); EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(bio_copy); diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b95ff2d40a4..33fc669b7842 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/buffer.c b/fs/buffer.c index 30c0adeec762..d024b78c3e60 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -2300,65 +2299,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, return tmp.b_blocknr; } -/* - * Start I/O on a physical range of kernel memory, defined by a vector - * of kiobuf structs (much like a user-space iovec list). - * - * The kiobuf must already be locked for IO. IO is submitted - * asynchronously: you need to check page->locked and page->uptodate. - * - * It is up to the caller to make sure that there are enough blocks - * passed in to completely map the iobufs to disk. - */ -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - struct block_device *bdev, sector_t b[], int size) -{ - int transferred; - int i; - int err; - struct kiobuf * iobuf; - - if (!nr) - return 0; - - /* - * First, do some alignment and validity checks - */ - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) - return -EINVAL; - if (!iobuf->nr_pages) - panic("brw_kiovec: iobuf not initialised"); - } - - /* - * OK to walk down the iovec doing page IO on each page we find. - */ - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - iobuf->errno = 0; - - ll_rw_kio(rw, iobuf, bdev, b[i] * (size >> 9)); - } - - /* - * now they are all submitted, wait for completion - */ - transferred = 0; - err = 0; - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - kiobuf_wait_for_io(iobuf); - if (iobuf->errno && !err) - err = iobuf->errno; - if (!err) - transferred += iobuf->length; - } - - return err ? err : transferred; -} - static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err) { struct buffer_head *bh = bio->bi_private; diff --git a/fs/fcntl.c b/fs/fcntl.c index 539711ef1061..c2fc83cdfed6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include diff --git a/fs/file_table.c b/fs/file_table.c index d6093fc0b1b5..fe6c048c2bab 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/fs/iobuf.c b/fs/iobuf.c deleted file mode 100644 index 62c44534c68a..000000000000 --- a/fs/iobuf.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * iobuf.c - * - * Keep track of the general-purpose IO-buffer structures used to track - * abstract kernel-space io buffers. - * - */ - -#include -#include - -int end_kio_request(struct kiobuf *kiobuf, int uptodate) -{ - int ret = 1; - - if ((!uptodate) && !kiobuf->errno) - kiobuf->errno = -EIO; - - if (atomic_dec_and_test(&kiobuf->io_count)) { - ret = 0; - if (kiobuf->end_io) - kiobuf->end_io(kiobuf); - wake_up(&kiobuf->wait_queue); - } - - return ret; -} - -static void kiobuf_init(struct kiobuf *iobuf) -{ - init_waitqueue_head(&iobuf->wait_queue); - atomic_set(&iobuf->io_count, 0); - iobuf->array_len = KIO_STATIC_PAGES; - iobuf->maplist = iobuf->map_array; - iobuf->nr_pages = 0; - iobuf->locked = 0; - iobuf->io_count.counter = 0; - iobuf->end_io = NULL; -} - -int alloc_kiovec(int nr, struct kiobuf **bufp) -{ - int i; - struct kiobuf *iobuf; - - for (i = 0; i < nr; i++) { - iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); - if (!iobuf) { - free_kiovec(i, bufp); - return -ENOMEM; - } - kiobuf_init(iobuf); - bufp[i] = iobuf; - } - - return 0; -} - -void free_kiovec(int nr, struct kiobuf **bufp) -{ - int i; - struct kiobuf *iobuf; - - for (i = 0; i < nr; i++) { - iobuf = bufp[i]; - if (iobuf->locked) - unlock_kiovec(1, &iobuf); - if (iobuf->array_len > KIO_STATIC_PAGES) - kfree (iobuf->maplist); - kfree(bufp[i]); - } -} - -int expand_kiobuf(struct kiobuf *iobuf, int wanted) -{ - struct page ** maplist; - - if (iobuf->array_len >= wanted) - return 0; - - maplist = (struct page **) - kmalloc(wanted * sizeof(struct page **), GFP_KERNEL); - if (!maplist) - return -ENOMEM; - - /* Did it grow while we waited? */ - if (iobuf->array_len >= wanted) { - kfree(maplist); - return 0; - } - - memcpy (maplist, iobuf->maplist, iobuf->array_len * sizeof(struct page **)); - - if (iobuf->array_len > KIO_STATIC_PAGES) - kfree (iobuf->maplist); - - iobuf->maplist = maplist; - iobuf->array_len = wanted; - return 0; -} - - -void kiobuf_wait_for_io(struct kiobuf *kiobuf) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - if (atomic_read(&kiobuf->io_count) == 0) - return; - - add_wait_queue(&kiobuf->wait_queue, &wait); -repeat: - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (atomic_read(&kiobuf->io_count) != 0) { - blk_run_queues(); - schedule(); - if (atomic_read(&kiobuf->io_count) != 0) - goto repeat; - } - tsk->state = TASK_RUNNING; - remove_wait_queue(&kiobuf->wait_queue, &wait); -} - - - diff --git a/fs/open.c b/fs/open.c index 673d20cd1ee8..3e690b0cd50d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c index 184d173ee192..e749c3c3bbed 100644 --- a/fs/xfs/linux/xfs_aops.c +++ b/fs/xfs/linux/xfs_aops.c @@ -34,7 +34,6 @@ #include #include #include -#include STATIC int delalloc_convert(struct inode *, struct page *, int, int); diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c index a2b5f0162ccd..5dbf4fd9debf 100644 --- a/fs/xfs/linux/xfs_ioctl.c +++ b/fs/xfs/linux/xfs_ioctl.c @@ -35,7 +35,6 @@ #include #include #include -#include extern int xfs_change_file_space(bhv_desc_t *, int, @@ -605,6 +604,7 @@ xfs_ioctl( * it is set to the file system block size to * avoid having to do block zeroing on short writes. */ +#define KIO_MAX_ATOMIC_IO 512 /* FIXME: what do we really want here? */ da.d_maxiosz = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 71732e1216fc..0760d97cd6f9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) struct page; -struct kiobuf; struct buffer_head; struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); diff --git a/include/linux/iobuf.h b/include/linux/iobuf.h deleted file mode 100644 index fb147b5c48a7..000000000000 --- a/include/linux/iobuf.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * iobuf.h - * - * Defines the structures used to track abstract kernel-space io buffers. - * - */ - -#ifndef __LINUX_IOBUF_H -#define __LINUX_IOBUF_H - -#include -#include -#include -#include - -/* - * The kiobuf structure describes a physical set of pages reserved - * locked for IO. The reference counts on each page will have been - * incremented, and the flags field will indicate whether or not we have - * pre-locked all of the pages for IO. - * - * kiobufs may be passed in arrays to form a kiovec, but we must - * preserve the property that no page is present more than once over the - * entire iovec. - */ - -#define KIO_MAX_ATOMIC_IO 512 /* in kb */ -#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) - -/* The main kiobuf struct */ - -struct kiobuf -{ - int nr_pages; /* Pages actually referenced */ - int array_len; /* Space in the allocated lists */ - int offset; /* Offset to start of valid data */ - int length; /* Number of valid bytes of data */ - - /* Keep separate track of the physical addresses and page - * structs involved. If we do IO to a memory-mapped device - * region, there won't necessarily be page structs defined for - * every address. */ - - struct page ** maplist; - - unsigned int locked : 1; /* If set, pages has been locked */ - - /* Always embed enough struct pages for atomic IO */ - struct page * map_array[KIO_STATIC_PAGES]; - sector_t blocks[KIO_MAX_SECTORS]; - - /* Dynamic state for IO completion: */ - atomic_t io_count; /* IOs still in progress */ - int errno; /* Status of completed IO */ - void (*end_io) (struct kiobuf *); /* Completion callback */ - wait_queue_head_t wait_queue; -}; - - -/* mm/memory.c */ - -int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len); -void unmap_kiobuf(struct kiobuf *iobuf); -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait); -int unlock_kiovec(int nr, struct kiobuf *iovec[]); -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes); - -/* fs/iobuf.c */ - -int end_kio_request(struct kiobuf *, int); -void simple_wakeup_kiobuf(struct kiobuf *); -int alloc_kiovec(int nr, struct kiobuf **); -void free_kiovec(int nr, struct kiobuf **); -int expand_kiobuf(struct kiobuf *, int); -void kiobuf_wait_for_io(struct kiobuf *); -extern int alloc_kiobuf_bhs(struct kiobuf *); -extern void free_kiobuf_bhs(struct kiobuf *); - -/* fs/buffer.c */ - -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - struct block_device *bdev, sector_t [], int size); - -/* fs/bio.c */ -void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t block); - -#endif /* __LINUX_IOBUF_H */ diff --git a/init/main.c b/init/main.c index f69c298b9a6f..c6023edc03f3 100644 --- a/init/main.c +++ b/init/main.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/ksyms.c b/kernel/ksyms.c index bd0a43fcf7f4..4b3e40b10a76 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -439,18 +438,6 @@ EXPORT_SYMBOL(__br_write_lock); EXPORT_SYMBOL(__br_write_unlock); #endif -/* Kiobufs */ -EXPORT_SYMBOL(alloc_kiovec); -EXPORT_SYMBOL(free_kiovec); -EXPORT_SYMBOL(expand_kiobuf); - -EXPORT_SYMBOL(map_user_kiobuf); -EXPORT_SYMBOL(unmap_kiobuf); -EXPORT_SYMBOL(lock_kiovec); -EXPORT_SYMBOL(unlock_kiovec); -EXPORT_SYMBOL(brw_kiovec); -EXPORT_SYMBOL(kiobuf_wait_for_io); - #ifdef HAVE_DISABLE_HLT EXPORT_SYMBOL(disable_hlt); EXPORT_SYMBOL(enable_hlt); diff --git a/mm/filemap.c b/mm/filemap.c index 4c25b92352c0..b2fbb1cbf90b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/memory.c b/mm/memory.c index 1c8d8af264f9..70403c0cb902 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -504,7 +503,7 @@ out: /* * Given a physical address, is there a useful struct page pointing to * it? This may become more complex in the future if we start dealing - * with IO-aperture pages in kiobufs. + * with IO-aperture pages for direct-IO. */ static inline struct page *get_page_map(struct page *page) @@ -589,224 +588,6 @@ out: return i; } -/* - * Force in an entire range of pages from the current process's user VA, - * and pin them in physical memory. - */ -#define dprintk(x...) - -int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) -{ - int pgcount, err; - struct mm_struct * mm; - - /* Make sure the iobuf is not already mapped somewhere. */ - if (iobuf->nr_pages) - return -EINVAL; - - mm = current->mm; - dprintk ("map_user_kiobuf: begin\n"); - - pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; - /* mapping 0 bytes is not permitted */ - if (!pgcount) BUG(); - err = expand_kiobuf(iobuf, pgcount); - if (err) - return err; - - iobuf->locked = 0; - iobuf->offset = va & (PAGE_SIZE-1); - iobuf->length = len; - - /* Try to fault in all of the necessary pages */ - down_read(&mm->mmap_sem); - /* rw==READ means read from disk, write into memory area */ - err = get_user_pages(current, mm, va, pgcount, - (rw==READ), 0, iobuf->maplist, NULL); - up_read(&mm->mmap_sem); - if (err < 0) { - unmap_kiobuf(iobuf); - dprintk ("map_user_kiobuf: end %d\n", err); - return err; - } - iobuf->nr_pages = err; - while (pgcount--) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(iobuf->maplist[pgcount]); - } - dprintk ("map_user_kiobuf: end OK\n"); - return 0; -} - -/* - * Mark all of the pages in a kiobuf as dirty - * - * We need to be able to deal with short reads from disk: if an IO error - * occurs, the number of bytes read into memory may be less than the - * size of the kiobuf, so we have to stop marking pages dirty once the - * requested byte count has been reached. - */ - -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes) -{ - int index, offset, remaining; - struct page *page; - - index = iobuf->offset >> PAGE_SHIFT; - offset = iobuf->offset & ~PAGE_MASK; - remaining = bytes; - if (remaining > iobuf->length) - remaining = iobuf->length; - - while (remaining > 0 && index < iobuf->nr_pages) { - page = iobuf->maplist[index]; - - if (!PageReserved(page)) - set_page_dirty(page); - - remaining -= (PAGE_SIZE - offset); - offset = 0; - index++; - } -} - -/* - * Unmap all of the pages referenced by a kiobuf. We release the pages, - * and unlock them if they were locked. - */ - -void unmap_kiobuf (struct kiobuf *iobuf) -{ - int i; - struct page *map; - - for (i = 0; i < iobuf->nr_pages; i++) { - map = iobuf->maplist[i]; - if (map) { - if (iobuf->locked) - unlock_page(map); - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(map); - } - } - - iobuf->nr_pages = 0; - iobuf->locked = 0; -} - - -/* - * Lock down all of the pages of a kiovec for IO. - * - * If any page is mapped twice in the kiovec, we return the error -EINVAL. - * - * The optional wait parameter causes the lock call to block until all - * pages can be locked if set. If wait==0, the lock operation is - * aborted if any locked pages are found and -EAGAIN is returned. - */ - -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - int doublepage = 0; - int repeat = 0; - - repeat: - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (iobuf->locked) - continue; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - - if (TestSetPageLocked(page)) { - while (j--) { - struct page *tmp = *--ppage; - if (tmp) - unlock_page(tmp); - } - goto retry; - } - } - iobuf->locked = 1; - } - - return 0; - - retry: - - /* - * We couldn't lock one of the pages. Undo the locking so far, - * wait on the page we got to, and try again. - */ - - unlock_kiovec(nr, iovec); - if (!wait) - return -EAGAIN; - - /* - * Did the release also unlock the page we got stuck on? - */ - if (!PageLocked(page)) { - /* - * If so, we may well have the page mapped twice - * in the IO address range. Bad news. Of - * course, it _might_ just be a coincidence, - * but if it happens more than once, chances - * are we have a double-mapped page. - */ - if (++doublepage >= 3) - return -EINVAL; - - /* Try again... */ - wait_on_page_locked(page); - } - - if (++repeat < 16) - goto repeat; - return -EAGAIN; -} - -/* - * Unlock all of the pages of a kiovec after IO. - */ - -int unlock_kiovec(int nr, struct kiobuf *iovec[]) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (!iobuf->locked) - continue; - iobuf->locked = 0; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - unlock_page(page); - } - } - return 0; -} - static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pgprot_t prot) { -- cgit v1.2.3