diff options
| author | Neil Brown <neilb@cse.unsw.edu.au> | 2002-10-30 00:24:57 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@penguin.transmeta.com> | 2002-10-30 00:24:57 -0800 |
| commit | a0e7d495df35797364092fedff52ec488ec702eb (patch) | |
| tree | 90bfcca9826f70037c01f818b33e9321a57fc46d /fs | |
| parent | 335c5fc746de61c7ef278eda451162e388b57d49 (diff) | |
[PATCH] kNFSd: Convert nfsd to use a list of pages instead of one big buffer
This means:
1/ We don't need an order-4 allocation for each nfsd that starts
2/ We don't need an order-4 allocation in skb_linearize when
we receive a 32K write request
3/ It will be easier to incorporate the zero-copy read changes
The pages are handed around using an xdr_buf (instead of svc_buf)
much like the NFS client so future crypto code can use the same
data structure for both client and server.
The code assumes that most requests and replies fit in a single page.
The exceptions are assumed to have some largish 'data' bit, and the
rest must fit in a single page.
The 'data' bits are file data, readdir data, and symlinks.
There must be only one 'data' bit per request.
This is all fine for nfs/nlm.
This isn't complete:
1/ NFSv4 hasn't been converted yet (it won't compile)
2/ NFSv3 allows symlinks upto 4096, but the code will only support
upto about 3800 at the moment
3/ readdir responses are limited to about 3800.
but I thought that patch was big enough, and the rest can come
later.
This patch introduces vfs_readv and vfs_writev as parallels to
vfs_read and vfs_write. This means there is a fair bit of
duplication in read_write.c that should probably be tidied up...
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/lockd/xdr.c | 19 | ||||
| -rw-r--r-- | fs/lockd/xdr4.c | 20 | ||||
| -rw-r--r-- | fs/nfsd/nfs3proc.c | 30 | ||||
| -rw-r--r-- | fs/nfsd/nfs3xdr.c | 86 | ||||
| -rw-r--r-- | fs/nfsd/nfscache.c | 42 | ||||
| -rw-r--r-- | fs/nfsd/nfsproc.c | 24 | ||||
| -rw-r--r-- | fs/nfsd/nfssvc.c | 8 | ||||
| -rw-r--r-- | fs/nfsd/nfsxdr.c | 86 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 9 | ||||
| -rw-r--r-- | fs/read_write.c | 94 |
10 files changed, 263 insertions, 155 deletions
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 5e87dd2fa59f..3d604168ebf9 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -216,25 +216,6 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp) return p; } -/* - * Check buffer bounds after decoding arguments - */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_argbuf; - - return p - buf->base <= buf->buflen; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_resbuf; - - buf->len = p - buf->base; - return (buf->len <= buf->buflen); -} /* * First, the server side XDR functions diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 479bcdf73c2d..1f11211cbeb2 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -223,26 +223,6 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp) /* - * Check buffer bounds after decoding arguments - */ -static int -xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_argbuf; - - return p - buf->base <= buf->buflen; -} - -static int -xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_resbuf; - - buf->len = p - buf->base; - return (buf->len <= buf->buflen); -} - -/* * First, the server side XDR functions */ int diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 61be6186bacf..ede78ddd05ae 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -43,11 +43,11 @@ static int nfs3_ftypes[] = { /* * Reserve room in the send buffer */ -static void -svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) +static inline void +svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr) { - *ptr = buf->buf + nr; - *len = buf->buflen - buf->len - nr; + *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr; + *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr; } /* @@ -150,7 +150,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); /* Reserve room for status, post_op_attr, and path length */ - svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, + svcbuf_reserve(&rqstp->rq_res, &path, &dummy, 1 + NFS3_POST_OP_ATTR_WORDS + 1); /* Read the symlink. */ @@ -167,8 +167,7 @@ static int nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, struct nfsd3_readres *resp) { - u32 * buffer; - int nfserr, avail; + int nfserr; dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", SVCFH_fmt(&argp->fh), @@ -179,18 +178,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, - 1 + NFS3_POST_OP_ATTR_WORDS + 3); + resp->count = argp->count; - if ((avail << 2) < resp->count) - resp->count = avail << 2; + if (NFSSVC_MAXBLKSIZE < resp->count) + resp->count = NFSSVC_MAXBLKSIZE; - svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + argp->count +4); + svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_read(rqstp, &resp->fh, argp->offset, - (char *) buffer, + argp->vec, argp->vlen, &resp->count); if (nfserr == 0) { struct inode *inode = resp->fh.fh_dentry->d_inode; @@ -220,7 +218,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, resp->committed = argp->stable; nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, - argp->data, + argp->vec, argp->vlen, argp->len, &resp->committed); resp->count = argp->count; @@ -447,7 +445,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, argp->count, (u32) argp->cookie); /* Reserve buffer space for status, attributes and verifier */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, + svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1 + NFS3_POST_OP_ATTR_WORDS + 2); /* Make sure we've room for the NULL ptr & eof flag, and shrink to @@ -490,7 +488,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, argp->count, (u32) argp->cookie); /* Reserve buffer space for status, attributes and verifier */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, + svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1 + NFS3_POST_OP_ATTR_WORDS + 2); /* Make sure we've room for the NULL ptr & eof flag, and shrink to diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9eeba9f3291d..963bf3c7bf1d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/dcache.h> #include <linux/namei.h> +#include <linux/mm.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svc.h> @@ -269,27 +270,6 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return encode_post_op_attr(rqstp, p, fhp); } -/* - * Check buffer bounds after decoding arguments - */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_argbuf; - - return p - buf->base <= buf->buflen; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_resbuf; - - buf->len = p - buf->base; - dprintk("nfsd: ressize_check p %p base %p len %d\n", - p, buf->base, buf->buflen); - return (buf->len <= buf->buflen); -} /* * XDR decode functions @@ -342,11 +322,29 @@ int nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_readargs *args) { + int len; + int v,pn; + if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) return 0; - args->count = ntohl(*p++); + len = args->count = ntohl(*p++); + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + + /* set up the iovec */ + v=0; + while (len > 0) { + pn = rqstp->rq_resused; + take_page(rqstp); + args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + v++; + len -= PAGE_SIZE; + } + args->vlen = v; return xdr_argsize_check(rqstp, p); } @@ -354,17 +352,33 @@ int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_writeargs *args) { + int len, v; + if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) return 0; args->count = ntohl(*p++); args->stable = ntohl(*p++); - args->len = ntohl(*p++); - args->data = (char *) p; - p += XDR_QUADLEN(args->len); + len = args->len = ntohl(*p++); + + args->vec[0].iov_base = (void*)p; + args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - + (((void*)p) - rqstp->rq_arg.head[0].iov_base); + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + v= 0; + while (len > args->vec[v].iov_len) { + len -= args->vec[v].iov_len; + v++; + args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); + args->vec[v].iov_len = PAGE_SIZE; + } + args->vec[v].iov_len = len; + args->vlen = v+1; - return xdr_argsize_check(rqstp, p); + return args->count == args->len && args->vec[0].iov_len > 0; } int @@ -584,9 +598,23 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, *p++ = htonl(resp->count); *p++ = htonl(resp->eof); *p++ = htonl(resp->count); /* xdr opaque count */ - p += XDR_QUADLEN(resp->count); - } - return xdr_ressize_check(rqstp, p); + xdr_ressize_check(rqstp, p); + /* now update rqstp->rq_res to reflect data aswell */ + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to page with tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); + } + rqstp->rq_res.len = + rqstp->rq_res.head[0].iov_len+ + rqstp->rq_res.page_len+ + rqstp->rq_res.tail[0].iov_len; + return 1; + } else + return xdr_ressize_check(rqstp, p); } /* WRITE */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index ab52b4b100f5..b1ae27ee05ba 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -41,7 +41,7 @@ static struct svc_cacherep * lru_tail; static struct svc_cacherep * nfscache; static int cache_disabled = 1; -static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data); +static int nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *vec); /* * locking for the reply cache: @@ -107,7 +107,7 @@ nfsd_cache_shutdown(void) for (rp = lru_head; rp; rp = rp->c_lru_next) { if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replbuf.buf); + kfree(rp->c_replvec.iov_base); } cache_disabled = 1; @@ -242,8 +242,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { - kfree(rp->c_replbuf.buf); - rp->c_replbuf.buf = NULL; + kfree(rp->c_replvec.iov_base); + rp->c_replvec.iov_base = NULL; } rp->c_type = RC_NOCACHE; out: @@ -272,11 +272,11 @@ found_entry: case RC_NOCACHE: break; case RC_REPLSTAT: - svc_putu32(&rqstp->rq_resbuf, rp->c_replstat); + svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat); rtn = RC_REPLY; break; case RC_REPLBUFF: - if (!nfsd_cache_append(rqstp, &rp->c_replbuf)) + if (!nfsd_cache_append(rqstp, &rp->c_replvec)) goto out; /* should not happen */ rtn = RC_REPLY; break; @@ -308,13 +308,14 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) { struct svc_cacherep *rp; - struct svc_buf *resp = &rqstp->rq_resbuf, *cachp; + struct iovec *resv = &rqstp->rq_res.head[0], *cachv; int len; if (!(rp = rqstp->rq_cacherep) || cache_disabled) return; - len = resp->len - (statp - resp->base); + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); + len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { @@ -329,16 +330,16 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) rp->c_replstat = *statp; break; case RC_REPLBUFF: - cachp = &rp->c_replbuf; - cachp->buf = (u32 *) kmalloc(len << 2, GFP_KERNEL); - if (!cachp->buf) { + cachv = &rp->c_replvec; + cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + if (!cachv->iov_base) { spin_lock(&cache_lock); rp->c_state = RC_UNUSED; spin_unlock(&cache_lock); return; } - cachp->len = len; - memcpy(cachp->buf, statp, len << 2); + cachv->iov_len = len << 2; + memcpy(cachv->iov_base, statp, len << 2); break; } spin_lock(&cache_lock); @@ -353,19 +354,20 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) /* * Copy cached reply to current reply buffer. Should always fit. + * FIXME as reply is in a page, we should just attach the page, and + * keep a refcount.... */ static int -nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data) +nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *data) { - struct svc_buf *resp = &rqstp->rq_resbuf; + struct iovec *vec = &rqstp->rq_res.head[0]; - if (resp->len + data->len > resp->buflen) { + if (vec->iov_len + data->iov_len > PAGE_SIZE) { printk(KERN_WARNING "nfsd: cached reply too large (%d).\n", - data->len); + data->iov_len); return 0; } - memcpy(resp->buf, data->buf, data->len << 2); - resp->buf += data->len; - resp->len += data->len; + memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len); + vec->iov_len += data->iov_len; return 1; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 06c4326e469b..997400e1105a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -30,11 +30,11 @@ typedef struct svc_buf svc_buf; #define NFSDDBG_FACILITY NFSDDBG_PROC -static void -svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) +static inline void +svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr) { - *ptr = buf->buf + nr; - *len = buf->buflen - buf->len - nr; + *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr; + *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr; } static int @@ -109,7 +109,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Reserve room for status and path length */ - svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2); + svcbuf_reserve(&rqstp->rq_res, &path, &dummy, 2); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; @@ -127,8 +127,7 @@ static int nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, struct nfsd_readres *resp) { - u32 * buffer; - int nfserr, avail; + int nfserr; dprintk("nfsd: READ %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -137,22 +136,21 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19); - if ((avail << 2) < argp->count) { + if (NFSSVC_MAXBLKSIZE < argp->count) { printk(KERN_NOTICE "oversized read request from %08x:%d (%d bytes)\n", ntohl(rqstp->rq_addr.sin_addr.s_addr), ntohs(rqstp->rq_addr.sin_port), argp->count); - argp->count = avail << 2; + argp->count = NFSSVC_MAXBLKSIZE; } svc_reserve(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, - (char *) buffer, + argp->vec, argp->vlen, &resp->count); return nfserr; @@ -175,7 +173,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, - argp->data, + argp->vec, argp->vlen, argp->len, &stable); return nfserr; @@ -478,7 +476,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp, argp->count, argp->cookie); /* Reserve buffer space for status */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1); + svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1); /* Shrink to the client read size */ if (count > (argp->count >> 2)) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index a12f2c980865..da4271183ef7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -277,7 +277,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) /* Decode arguments */ xdr = proc->pc_decode; - if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) { + if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base, + rqstp->rq_argp)) { dprintk("nfsd: failed to decode arguments!\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); *statp = rpc_garbage_args; @@ -293,14 +294,15 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) } if (rqstp->rq_proc != 0) - svc_putu32(&rqstp->rq_resbuf, nfserr); + svc_putu32(&rqstp->rq_res.head[0], nfserr); /* Encode result. * For NFSv2, additional info is never returned in case of an error. */ if (!(nfserr && rqstp->rq_vers == 2)) { xdr = proc->pc_encode; - if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { + if (xdr && !xdr(rqstp, (u32*)(rqstp->rq_res.head[0].iov_base+rqstp->rq_res.head[0].iov_len), + rqstp->rq_resp)) { /* Failed to encode result. Release cache entry */ dprintk("nfsd: failed to encode result!\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 15f1c7a16031..d0895793efb1 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfsd/nfsd.h> #include <linux/nfsd/xdr.h> +#include <linux/mm.h> #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -176,27 +177,6 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } -/* - * Check buffer bounds after decoding arguments - */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_argbuf; - - return p - buf->base <= buf->buflen; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) -{ - struct svc_buf *buf = &rqstp->rq_resbuf; - - buf->len = p - buf->base; - dprintk("nfsd: ressize_check p %p base %p len %d\n", - p, buf->base, buf->buflen); - return (buf->len <= buf->buflen); -} /* * XDR decode functions @@ -241,13 +221,31 @@ int nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readargs *args) { + int len; + int v,pn; if (!(p = decode_fh(p, &args->fh))) return 0; args->offset = ntohl(*p++); - args->count = ntohl(*p++); - args->totalsize = ntohl(*p++); + len = args->count = ntohl(*p++); + p++; /* totalcount - unused */ + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + /* set up somewhere to store response. + * We take pages, put them on reslist and include in iovec + */ + v=0; + while (len > 0) { + pn=rqstp->rq_resused; + take_page(rqstp); + args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + v++; + len -= PAGE_SIZE; + } + args->vlen = v; return xdr_argsize_check(rqstp, p); } @@ -255,17 +253,30 @@ int nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_writeargs *args) { + int len; + int v; if (!(p = decode_fh(p, &args->fh))) return 0; p++; /* beginoffset */ args->offset = ntohl(*p++); /* offset */ p++; /* totalcount */ - args->len = ntohl(*p++); - args->data = (char *) p; - p += XDR_QUADLEN(args->len); - - return xdr_argsize_check(rqstp, p); + len = args->len = ntohl(*p++); + args->vec[0].iov_base = (void*)p; + args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - + (((void*)p) - rqstp->rq_arg.head[0].iov_base); + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + v = 0; + while (len > args->vec[v].iov_len) { + len -= args->vec[v].iov_len; + v++; + args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); + args->vec[v].iov_len = PAGE_SIZE; + } + args->vec[v].iov_len = len; + args->vlen = v+1; + return args->vec[0].iov_len > 0; } int @@ -371,9 +382,22 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, { p = encode_fattr(rqstp, p, &resp->fh); *p++ = htonl(resp->count); - p += XDR_QUADLEN(resp->count); - - return xdr_ressize_check(rqstp, p); + xdr_ressize_check(rqstp, p); + + /* now update rqstp->rq_res to reflect data aswell */ + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad with tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); + } + rqstp->rq_res.len = + rqstp->rq_res.head[0].iov_len+ + rqstp->rq_res.page_len+ + rqstp->rq_res.tail[0].iov_len; + return 1; } int diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cb06e914d548..76ad1349e5b9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -577,7 +577,7 @@ found: */ int nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - char *buf, unsigned long *count) + struct iovec *vec, int vlen, unsigned long *count) { struct raparms *ra; mm_segment_t oldfs; @@ -603,7 +603,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, oldfs = get_fs(); set_fs(KERNEL_DS); - err = vfs_read(&file, buf, *count, &offset); + err = vfs_readv(&file, vec, vlen, *count, &offset); set_fs(oldfs); /* Write back readahead params */ @@ -629,7 +629,8 @@ out: */ int nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - char *buf, unsigned long cnt, int *stablep) + struct iovec *vec, int vlen, + unsigned long cnt, int *stablep) { struct svc_export *exp; struct file file; @@ -677,7 +678,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - err = vfs_write(&file, buf, cnt, &offset); + err = vfs_writev(&file, vec, vlen, cnt, &offset); if (err >= 0) nfsdstats.io_write += cnt; set_fs(oldfs); diff --git a/fs/read_write.c b/fs/read_write.c index a8b23e6367ee..a773421cb6f7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -207,6 +207,53 @@ ssize_t vfs_read(struct file *file, char *buf, size_t count, loff_t *pos) return ret; } +ssize_t vfs_readv(struct file *file, struct iovec *vec, int vlen, size_t count, loff_t *pos) +{ + struct inode *inode = file->f_dentry->d_inode; + ssize_t ret; + + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) + return -EINVAL; + + ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count); + if (!ret) { + ret = security_ops->file_permission (file, MAY_READ); + if (!ret) { + if (file->f_op->readv) + ret = file->f_op->readv(file, vec, vlen, pos); + else { + /* do it by hand */ + struct iovec *vector = vec; + ret = 0; + while (vlen > 0) { + void * base = vector->iov_base; + size_t len = vector->iov_len; + ssize_t nr; + vector++; + vlen--; + if (file->f_op->read) + nr = file->f_op->read(file, base, len, pos); + else + nr = do_sync_read(file, base, len, pos); + if (nr < 0) { + if (!ret) ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + } + if (ret > 0) + dnotify_parent(file->f_dentry, DN_ACCESS); + } + } + + return ret; +} + ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos) { struct kiocb kiocb; @@ -247,6 +294,53 @@ ssize_t vfs_write(struct file *file, const char *buf, size_t count, loff_t *pos) return ret; } +ssize_t vfs_writev(struct file *file, const struct iovec *vec, int vlen, size_t count, loff_t *pos) +{ + struct inode *inode = file->f_dentry->d_inode; + ssize_t ret; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + return -EINVAL; + + ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count); + if (!ret) { + ret = security_ops->file_permission (file, MAY_WRITE); + if (!ret) { + if (file->f_op->writev) + ret = file->f_op->writev(file, vec, vlen, pos); + else { + /* do it by hand */ + const struct iovec *vector = vec; + ret = 0; + while (vlen > 0) { + void * base = vector->iov_base; + size_t len = vector->iov_len; + ssize_t nr; + vector++; + vlen--; + if (file->f_op->write) + nr = file->f_op->write(file, base, len, pos); + else + nr = do_sync_write(file, base, len, pos); + if (nr < 0) { + if (!ret) ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + } + if (ret > 0) + dnotify_parent(file->f_dentry, DN_MODIFY); + } + } + + return ret; +} + asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count) { struct file *file; |
