diff options
| author | Andrew Morton <akpm@zip.com.au> | 2002-07-18 21:10:30 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-07-18 21:10:30 -0700 |
| commit | e3339bee8dcfe3fe6a521b9700d78bbe79c3c6d2 (patch) | |
| tree | 2fee0e16b23b68698f1c30d813419cc3d7c07952 | |
| parent | 4504a57ef1e0454c1679ccc8ff9c7050f4385ff0 (diff) | |
[PATCH] direct_io mopup
Some cleanup from the surprise direct-to-bio for O_DIRECT merge.
- Remove bits and pieces from the kiobuf implementation
- Replace the waitqueue in struct dio with just a task_struct pointer
and use wake_up_process. (Ben).
- Only take mmap_sem around the individual calls to get_user_pages().
(It pins the vmas, yes?)
- Remove some debug code.
- Fix JFS.
| -rw-r--r-- | fs/buffer.c | 49 | ||||
| -rw-r--r-- | fs/direct-io.c | 31 | ||||
| -rw-r--r-- | fs/fcntl.c | 17 | ||||
| -rw-r--r-- | fs/file_table.c | 3 | ||||
| -rw-r--r-- | fs/jfs/inode.c | 7 | ||||
| -rw-r--r-- | fs/open.c | 11 | ||||
| -rw-r--r-- | include/linux/fs.h | 5 | ||||
| -rw-r--r-- | mm/filemap.c | 83 |
8 files changed, 12 insertions, 194 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 27300f9a6ee0..dfb38f4a6f44 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2309,55 +2309,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, return tmp.b_blocknr; } -#if 0 -int generic_direct_IO(int rw, struct inode *inode, - struct kiobuf *iobuf, unsigned long blocknr, - int blocksize, get_block_t *get_block) -{ - int i, nr_blocks, retval = 0; - sector_t *blocks = iobuf->blocks; - struct block_device *bdev = NULL; - - nr_blocks = iobuf->length / blocksize; - /* build the blocklist */ - for (i = 0; i < nr_blocks; i++, blocknr++) { - struct buffer_head bh; - - bh.b_state = 0; - bh.b_size = blocksize; - - retval = get_block(inode, blocknr, &bh, rw & 1); - if (retval) - goto out; - - if (rw == READ) { - if (buffer_new(&bh)) - BUG(); - if (!buffer_mapped(&bh)) { - /* there was an hole in the filesystem */ - blocks[i] = -1UL; - continue; - } - } else { - if (buffer_new(&bh)) - unmap_underlying_metadata(bh.b_bdev, - bh.b_blocknr); - if (!buffer_mapped(&bh)) - BUG(); - } - blocks[i] = bh.b_blocknr; - bdev = bh.b_bdev; - } - - /* This does not understand multi-device filesystems currently */ - if (bdev) - retval = brw_kiovec(rw, 1, &iobuf, bdev, blocks, blocksize); - - out: - return retval; -} -#endif - /* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). diff --git a/fs/direct-io.c b/fs/direct-io.c index 32c2b1097540..9a40e00ec452 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1,5 +1,5 @@ /* - * mm/direct-io.c + * fs/direct-io.c * * Copyright (C) 2002, Linus Torvalds. * @@ -61,7 +61,7 @@ struct dio { atomic_t bio_count; spinlock_t bio_list_lock; struct bio *bio_list; /* singly linked via bi_private */ - wait_queue_head_t wait_q; + struct task_struct *waiter; }; /* @@ -81,6 +81,7 @@ static int dio_refill_pages(struct dio *dio) int nr_pages; nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); + down_read(¤t->mm->mmap_sem); ret = get_user_pages( current, /* Task for fault acounting */ current->mm, /* whose pages? */ @@ -90,6 +91,7 @@ static int dio_refill_pages(struct dio *dio) 0, /* force (?) */ &dio->pages[0], NULL); /* vmas */ + up_read(¤t->mm->mmap_sem); if (ret >= 0) { dio->curr_user_address += ret * PAGE_SIZE; @@ -139,7 +141,7 @@ static void dio_bio_end_io(struct bio *bio) bio->bi_private = dio->bio_list; dio->bio_list = bio; spin_unlock_irqrestore(&dio->bio_list_lock, flags); - wake_up(&dio->wait_q); + wake_up_process(dio->waiter); } static int @@ -193,13 +195,11 @@ static void dio_cleanup(struct dio *dio) */ static struct bio *dio_await_one(struct dio *dio) { - DECLARE_WAITQUEUE(wait, current); unsigned long flags; struct bio *bio; spin_lock_irqsave(&dio->bio_list_lock, flags); while (dio->bio_list == NULL) { - add_wait_queue(&dio->wait_q, &wait); set_current_state(TASK_UNINTERRUPTIBLE); if (dio->bio_list == NULL) { spin_unlock_irqrestore(&dio->bio_list_lock, flags); @@ -208,7 +208,6 @@ static struct bio *dio_await_one(struct dio *dio) spin_lock_irqsave(&dio->bio_list_lock, flags); } set_current_state(TASK_RUNNING); - remove_wait_queue(&dio->wait_q, &wait); } bio = dio->bio_list; dio->bio_list = bio->bi_private; @@ -224,23 +223,17 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec; int page_no; - int ret = 0; for (page_no = 0; page_no < bio->bi_vcnt; page_no++) { struct page *page = bvec[page_no].bv_page; - if (!uptodate) { - if (ret == 0) - ret = -EIO; - } - if (dio->rw == READ) set_page_dirty(page); page_cache_release(page); } atomic_dec(&dio->bio_count); bio_put(bio); - return ret; + return uptodate ? 0 : -EIO; } /* @@ -265,7 +258,7 @@ static int dio_await_completion(struct dio *dio) * to keep the memory consumption sane we periodically reap any completed BIOs * during the BIO generation phase. * - * This also helps to limis the peak amount of pinned userspace memory. + * This also helps to limit the peak amount of pinned userspace memory. */ static int dio_bio_reap(struct dio *dio) { @@ -388,15 +381,13 @@ out: return ret; } -struct dio *g_dio; - int generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count, get_block_t get_block) { const unsigned blocksize_mask = (1 << inode->i_blkbits) - 1; const unsigned long user_addr = (unsigned long)buf; - int ret = 0; + int ret; int ret2; struct dio dio; size_t bytes; @@ -407,8 +398,6 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, goto out; } - g_dio = &dio; - /* BIO submission state */ dio.bio = NULL; dio.bvec = NULL; @@ -444,11 +433,9 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, atomic_set(&dio.bio_count, 0); spin_lock_init(&dio.bio_list_lock); dio.bio_list = NULL; - init_waitqueue_head(&dio.wait_q); + dio.waiter = current; - down_read(¤t->mm->mmap_sem); ret = do_direct_IO(&dio); - up_read(¤t->mm->mmap_sem); if (dio.bio) dio_bio_submit(&dio); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9d9df23dceb9..98392f716b2a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -248,23 +248,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (!inode->i_mapping || !inode->i_mapping->a_ops || !inode->i_mapping->a_ops->direct_IO) return -EINVAL; - - /* - * alloc_kiovec() can sleep and we are only serialized by - * the big kernel lock here, so abuse the i_sem to serialize - * this case too. We of course wouldn't need to go deep down - * to the inode layer, we could stay at the file layer, but - * we don't want to pay for the memory of a semaphore in each - * file structure too and we use the inode semaphore that we just - * pay for anyways. - */ - error = 0; - down(&inode->i_sem); - if (!filp->f_iobuf) - error = alloc_kiovec(1, &filp->f_iobuf); - up(&inode->i_sem); - if (error < 0) - return error; } /* required for strict SunOS emulation */ diff --git a/fs/file_table.c b/fs/file_table.c index 5734437cd750..cdaf93dd6716 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -115,9 +115,6 @@ void __fput(struct file * file) locks_remove_flock(file); - if (file->f_iobuf) - free_kiovec(1, &file->f_iobuf); - if (file->f_op && file->f_op->release) file->f_op->release(inode, file); fops_put(file->f_op); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 7e8fbd68824d..b3b221726155 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -293,11 +293,10 @@ static int jfs_bmap(struct address_space *mapping, long block) return generic_block_bmap(mapping, block, jfs_get_block); } -static int jfs_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, - unsigned long blocknr, int blocksize) +static int jfs_direct_IO(int rw, struct inode *inode, char *buf, + loff_t offset, size_t count) { - return generic_direct_IO(rw, inode, iobuf, blocknr, - blocksize, jfs_get_block); + return generic_direct_IO(rw, inode, buf, offset, count, jfs_get_block); } struct address_space_operations jfs_aops = { diff --git a/fs/open.c b/fs/open.c index 5ad6630562cf..c3e593544089 100644 --- a/fs/open.c +++ b/fs/open.c @@ -647,15 +647,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); - /* preallocate kiobuf for O_DIRECT */ - f->f_iobuf = NULL; - f->f_iobuf_lock = 0; - if (f->f_flags & O_DIRECT) { - error = alloc_kiovec(1, &f->f_iobuf); - if (error) - goto cleanup_all; - } - if (f->f_op && f->f_op->open) { error = f->f_op->open(inode,f); if (error) @@ -675,8 +666,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) return f; cleanup_all: - if (f->f_iobuf) - free_kiovec(1, &f->f_iobuf); fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) put_write_access(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ac85b8e28a3..84413138923e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -274,7 +274,6 @@ struct iattr { */ struct page; struct address_space; -struct kiobuf; struct address_space_operations { int (*writepage)(struct page *); @@ -493,10 +492,6 @@ struct file { /* needed for tty driver, and maybe others */ void *private_data; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; - long f_iobuf_lock; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); diff --git a/mm/filemap.c b/mm/filemap.c index b11dcb824da5..a3691b828cc0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1102,89 +1102,6 @@ no_cached_page: UPDATE_ATIME(inode); } -#if 0 -static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) -{ - ssize_t retval; - int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; - struct kiobuf * iobuf; - struct address_space * mapping = filp->f_dentry->d_inode->i_mapping; - struct inode * inode = mapping->host; - - new_iobuf = 0; - iobuf = filp->f_iobuf; - if (test_and_set_bit(0, &filp->f_iobuf_lock)) { - /* - * A parallel read/write is using the preallocated iobuf - * so just run slow and allocate a new one. - */ - retval = alloc_kiovec(1, &iobuf); - if (retval) - goto out; - new_iobuf = 1; - } - - blocksize = 1 << inode->i_blkbits; - blocksize_bits = inode->i_blkbits; - blocksize_mask = blocksize - 1; - chunk_size = KIO_MAX_ATOMIC_IO << 10; - - retval = -EINVAL; - if ((offset & blocksize_mask) || (count & blocksize_mask)) - goto out_free; - - /* - * Flush to disk exclusively the _data_, metadata must remain - * completly asynchronous or performance will go to /dev/null. - */ - retval = filemap_fdatawait(mapping); - if (retval == 0) - retval = filemap_fdatawrite(mapping); - if (retval == 0) - retval = filemap_fdatawait(mapping); - if (retval < 0) - goto out_free; - - progress = retval = 0; - while (count > 0) { - iosize = count; - if (iosize > chunk_size) - iosize = chunk_size; - - retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); - if (retval) - break; - - retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); - - if (rw == READ && retval > 0) - mark_dirty_kiobuf(iobuf, retval); - - if (retval >= 0) { - count -= retval; - buf += retval; - progress += retval; - } - - unmap_kiobuf(iobuf); - - if (retval != iosize) - break; - } - - if (progress) - retval = progress; - - out_free: - if (!new_iobuf) - clear_bit(0, &filp->f_iobuf_lock); - else - free_kiovec(1, &iobuf); - out: - return retval; -} -#endif - int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { char *kaddr; |
