summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorAndrew Morton <akpm@zip.com.au>2002-07-14 03:24:40 -0700
committerLinus Torvalds <torvalds@home.transmeta.com>2002-07-14 03:24:40 -0700
commit42ec8bc1d0bd11f0ebcb05c4c4a02065c08f4e73 (patch)
treec08d69e317566c25880498c36b072a54c1927eff /mm
parent2dbd15029c00ec56983a240a98306e8ea4101baa (diff)
[PATCH] direct-to-BIO for O_DIRECT
Here's a patch which converts O_DIRECT to go direct-to-BIO, bypassing the kiovec layer. It's followed by a patch which converts the raw driver to use the O_DIRECT engine. CPU utilisation is about the same as the kiovec-based implementation. Read and write bandwidth are the same too, for 128k chunks. But with one megabyte chunks, this implementation is 20% faster at writing. I assume this is because the kiobuf-based implementation has to stop and wait for each 128k chunk, whereas this code streams the entire request, regardless of its size. This is with a single (oldish) scsi disk on aic7xxx. I'd expect the margin to widen on higher-end hardware which likes to have more requests in flight. Question is: what do we want to do with this sucker? These are the remaining users of kiovecs: drivers/md/lvm-snap.c drivers/media/video/video-buf.c drivers/mtd/devices/blkmtd.c drivers/scsi/sg.c the video and mtd drivers seems to be fairly easy to de-kiobufize. I'm aware of one proprietary driver which uses kiobufs. XFS uses kiobufs a little bit - just to map the pages. So with a bit of effort and maintainer-irritation, we can extract the kiobuf layer from the kernel.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c64
1 files changed, 33 insertions, 31 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 5bbd6673d952..e755d89071eb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -414,7 +414,7 @@ static int invalidate_list_pages2(struct address_space * mapping,
* free the pages because they're mapped.
* @mapping: the address_space which pages we want to invalidate
*/
-void invalidate_inode_pages2(struct address_space * mapping)
+void invalidate_inode_pages2(struct address_space *mapping)
{
int unlocked;
@@ -1102,6 +1102,7 @@ no_cached_page:
UPDATE_ATIME(inode);
}
+#if 0
static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
{
ssize_t retval;
@@ -1182,6 +1183,7 @@ static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, si
out:
return retval;
}
+#endif
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
@@ -1209,15 +1211,36 @@ int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long o
* This is the "read()" routine for all filesystems
* that can use the page cache directly.
*/
-ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
+ssize_t
+generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
ssize_t retval;
if ((ssize_t) count < 0)
return -EINVAL;
- if (filp->f_flags & O_DIRECT)
- goto o_direct;
+ if (filp->f_flags & O_DIRECT) {
+ loff_t pos = *ppos, size;
+ struct address_space *mapping;
+ struct inode *inode;
+
+ mapping = filp->f_dentry->d_inode->i_mapping;
+ inode = mapping->host;
+ retval = 0;
+ if (!count)
+ goto out; /* skip atime */
+ size = inode->i_size;
+ if (pos < size) {
+ if (pos + count > size)
+ count = size - pos;
+ retval = generic_file_direct_IO(READ, inode,
+ buf, pos, count);
+ if (retval > 0)
+ *ppos = pos + retval;
+ }
+ UPDATE_ATIME(filp->f_dentry->d_inode);
+ goto out;
+ }
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
@@ -1230,36 +1253,14 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
desc.count = count;
desc.buf = buf;
desc.error = 0;
- do_generic_file_read(filp, ppos, &desc, file_read_actor);
-
+ do_generic_file_read(filp,ppos,&desc,file_read_actor);
retval = desc.written;
if (!retval)
retval = desc.error;
}
}
- out:
+out:
return retval;
-
- o_direct:
- {
- loff_t pos = *ppos, size;
- struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
- struct inode *inode = mapping->host;
-
- retval = 0;
- if (!count)
- goto out; /* skip atime */
- size = inode->i_size;
- if (pos < size) {
- if (pos + count > size)
- count = size - pos;
- retval = generic_file_direct_IO(READ, filp, buf, count, pos);
- if (retval > 0)
- *ppos = pos + retval;
- }
- UPDATE_ATIME(filp->f_dentry->d_inode);
- goto out;
- }
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
@@ -2186,8 +2187,8 @@ generic_file_write(struct file *file, const char *buf,
}
if (unlikely(file->f_flags & O_DIRECT)) {
- written = generic_file_direct_IO(WRITE, file,
- (char *) buf, count, pos);
+ written = generic_file_direct_IO(WRITE, inode,
+ (char *)buf, pos, count);
if (written > 0) {
loff_t end = pos + written;
if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
@@ -2195,7 +2196,8 @@ generic_file_write(struct file *file, const char *buf,
mark_inode_dirty(inode);
}
*ppos = end;
- invalidate_inode_pages2(mapping);
+ if (mapping->nrpages)
+ invalidate_inode_pages2(mapping);
}
/*
* Sync the fs metadata but not the minor inode changes and