diff options
| author | Andrew Morton <akpm@zip.com.au> | 2002-07-14 03:24:40 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-07-14 03:24:40 -0700 |
| commit | 42ec8bc1d0bd11f0ebcb05c4c4a02065c08f4e73 (patch) | |
| tree | c08d69e317566c25880498c36b072a54c1927eff /mm | |
| parent | 2dbd15029c00ec56983a240a98306e8ea4101baa (diff) | |
[PATCH] direct-to-BIO for O_DIRECT
Here's a patch which converts O_DIRECT to go direct-to-BIO, bypassing
the kiovec layer. It's followed by a patch which converts the raw
driver to use the O_DIRECT engine.
CPU utilisation is about the same as the kiovec-based implementation.
Read and write bandwidth are the same too, for 128k chunks. But with
one megabyte chunks, this implementation is 20% faster at writing.
I assume this is because the kiobuf-based implementation has to stop
and wait for each 128k chunk, whereas this code streams the entire
request, regardless of its size.
This is with a single (oldish) scsi disk on aic7xxx. I'd expect the
margin to widen on higher-end hardware which likes to have more
requests in flight.
Question is: what do we want to do with this sucker? These are the
remaining users of kiovecs:
drivers/md/lvm-snap.c
drivers/media/video/video-buf.c
drivers/mtd/devices/blkmtd.c
drivers/scsi/sg.c
the video and mtd drivers seems to be fairly easy to de-kiobufize.
I'm aware of one proprietary driver which uses kiobufs. XFS uses
kiobufs a little bit - just to map the pages.
So with a bit of effort and maintainer-irritation, we can extract
the kiobuf layer from the kernel.
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 64 |
1 files changed, 33 insertions, 31 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 5bbd6673d952..e755d89071eb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -414,7 +414,7 @@ static int invalidate_list_pages2(struct address_space * mapping, * free the pages because they're mapped. * @mapping: the address_space which pages we want to invalidate */ -void invalidate_inode_pages2(struct address_space * mapping) +void invalidate_inode_pages2(struct address_space *mapping) { int unlocked; @@ -1102,6 +1102,7 @@ no_cached_page: UPDATE_ATIME(inode); } +#if 0 static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) { ssize_t retval; @@ -1182,6 +1183,7 @@ static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, si out: return retval; } +#endif int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { @@ -1209,15 +1211,36 @@ int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long o * This is the "read()" routine for all filesystems * that can use the page cache directly. */ -ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) +ssize_t +generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { ssize_t retval; if ((ssize_t) count < 0) return -EINVAL; - if (filp->f_flags & O_DIRECT) - goto o_direct; + if (filp->f_flags & O_DIRECT) { + loff_t pos = *ppos, size; + struct address_space *mapping; + struct inode *inode; + + mapping = filp->f_dentry->d_inode->i_mapping; + inode = mapping->host; + retval = 0; + if (!count) + goto out; /* skip atime */ + size = inode->i_size; + if (pos < size) { + if (pos + count > size) + count = size - pos; + retval = generic_file_direct_IO(READ, inode, + buf, pos, count); + if (retval > 0) + *ppos = pos + retval; + } + UPDATE_ATIME(filp->f_dentry->d_inode); + goto out; + } retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { @@ -1230,36 +1253,14 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t * desc.count = count; desc.buf = buf; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); - + do_generic_file_read(filp,ppos,&desc,file_read_actor); retval = desc.written; if (!retval) retval = desc.error; } } - out: +out: return retval; - - o_direct: - { - loff_t pos = *ppos, size; - struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; - struct inode *inode = mapping->host; - - retval = 0; - if (!count) - goto out; /* skip atime */ - size = inode->i_size; - if (pos < size) { - if (pos + count > size) - count = size - pos; - retval = generic_file_direct_IO(READ, filp, buf, count, pos); - if (retval > 0) - *ppos = pos + retval; - } - UPDATE_ATIME(filp->f_dentry->d_inode); - goto out; - } } static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) @@ -2186,8 +2187,8 @@ generic_file_write(struct file *file, const char *buf, } if (unlikely(file->f_flags & O_DIRECT)) { - written = generic_file_direct_IO(WRITE, file, - (char *) buf, count, pos); + written = generic_file_direct_IO(WRITE, inode, + (char *)buf, pos, count); if (written > 0) { loff_t end = pos + written; if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { @@ -2195,7 +2196,8 @@ generic_file_write(struct file *file, const char *buf, mark_inode_dirty(inode); } *ppos = end; - invalidate_inode_pages2(mapping); + if (mapping->nrpages) + invalidate_inode_pages2(mapping); } /* * Sync the fs metadata but not the minor inode changes and |
