summaryrefslogtreecommitdiff
path: root/include/linux/sunrpc/svc.h
diff options
context:
space:
mode:
authorNeil Brown <neilb@cse.unsw.edu.au>2002-10-30 00:24:57 -0800
committerLinus Torvalds <torvalds@penguin.transmeta.com>2002-10-30 00:24:57 -0800
commita0e7d495df35797364092fedff52ec488ec702eb (patch)
tree90bfcca9826f70037c01f818b33e9321a57fc46d /include/linux/sunrpc/svc.h
parent335c5fc746de61c7ef278eda451162e388b57d49 (diff)
[PATCH] kNFSd: Convert nfsd to use a list of pages instead of one big buffer
This means: 1/ We don't need an order-4 allocation for each nfsd that starts 2/ We don't need an order-4 allocation in skb_linearize when we receive a 32K write request 3/ It will be easier to incorporate the zero-copy read changes The pages are handed around using an xdr_buf (instead of svc_buf) much like the NFS client so future crypto code can use the same data structure for both client and server. The code assumes that most requests and replies fit in a single page. The exceptions are assumed to have some largish 'data' bit, and the rest must fit in a single page. The 'data' bits are file data, readdir data, and symlinks. There must be only one 'data' bit per request. This is all fine for nfs/nlm. This isn't complete: 1/ NFSv4 hasn't been converted yet (it won't compile) 2/ NFSv3 allows symlinks upto 4096, but the code will only support upto about 3800 at the moment 3/ readdir responses are limited to about 3800. but I thought that patch was big enough, and the rest can come later. This patch introduces vfs_readv and vfs_writev as parallels to vfs_read and vfs_write. This means there is a fair bit of duplication in read_write.c that should probably be tidied up...
Diffstat (limited to 'include/linux/sunrpc/svc.h')
-rw-r--r--include/linux/sunrpc/svc.h110
1 files changed, 77 insertions, 33 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9ad879d9bea7..24464d66411a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -48,43 +48,49 @@ struct svc_serv {
* This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation.
*/
-#define RPCSVC_MAXPAYLOAD 16384u
+#define RPCSVC_MAXPAYLOAD (64*1024u)
/*
- * Buffer to store RPC requests or replies in.
- * Each server thread has one of these beasts.
+ * RPC Requsts and replies are stored in one or more pages.
+ * We maintain an array of pages for each server thread.
+ * Requests are copied into these pages as they arrive. Remaining
+ * pages are available to write the reply into.
*
- * Area points to the allocated memory chunk currently owned by the
- * buffer. Base points to the buffer containing the request, which is
- * different from area when directly reading from an sk_buff. buf is
- * the current read/write position while processing an RPC request.
+ * Currently pages are all re-used by the same server. Later we
+ * will use ->sendpage to transmit pages with reduced copying. In
+ * that case we will need to give away the page and allocate new ones.
+ * In preparation for this, we explicitly move pages off the recv
+ * list onto the transmit list, and back.
*
- * The array of iovecs can hold additional data that the server process
- * may not want to copy into the RPC reply buffer, but pass to the
- * network sendmsg routines directly. The prime candidate for this
- * will of course be NFS READ operations, but one might also want to
- * do something about READLINK and READDIR. It might be worthwhile
- * to implement some generic readdir cache in the VFS layer...
+ * We use xdr_buf for holding responses as it fits well with NFS
+ * read responses (that have a header, and some data pages, and possibly
+ * a tail) and means we can share some client side routines.
*
- * On the receiving end of the RPC server, the iovec may be used to hold
- * the list of IP fragments once we get to process fragmented UDP
- * datagrams directly.
+ * The xdr_buf.head iovec always points to the first page in the rq_*pages
+ * list. The xdr_buf.pages pointer points to the second page on that
+ * list. xdr_buf.tail points to the end of the first page.
+ * This assumes that the non-page part of an rpc reply will fit
+ * in a page - NFSd ensures this. lockd also has no trouble.
*/
-#define RPCSVC_MAXIOV ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
-struct svc_buf {
- u32 * area; /* allocated memory */
- u32 * base; /* base of RPC datagram */
- int buflen; /* total length of buffer */
- u32 * buf; /* read/write pointer */
- int len; /* current end of buffer */
-
- /* iovec for zero-copy NFS READs */
- struct iovec iov[RPCSVC_MAXIOV];
- int nriov;
-};
-#define svc_getu32(argp, val) { (val) = *(argp)->buf++; (argp)->len--; }
-#define svc_putu32(resp, val) { *(resp)->buf++ = (val); (resp)->len++; }
+#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
+
+static inline u32 svc_getu32(struct iovec *iov)
+{
+ u32 val, *vp;
+ vp = iov->iov_base;
+ val = *vp++;
+ iov->iov_base = (void*)vp;
+ iov->iov_len -= sizeof(u32);
+ return val;
+}
+static inline void svc_putu32(struct iovec *iov, u32 val)
+{
+ u32 *vp = iov->iov_base + iov->iov_len;
+ *vp = val;
+ iov->iov_len += sizeof(u32);
+}
+
/*
* The context of a single thread, including the request currently being
* processed.
@@ -102,9 +108,15 @@ struct svc_rqst {
struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
- struct svc_buf rq_defbuf; /* default buffer */
- struct svc_buf rq_argbuf; /* argument buffer */
- struct svc_buf rq_resbuf; /* result buffer */
+
+ struct xdr_buf rq_arg;
+ struct xdr_buf rq_res;
+ struct page * rq_argpages[RPCSVC_MAXPAGES];
+ struct page * rq_respages[RPCSVC_MAXPAGES];
+ short rq_argused; /* pages used for argument */
+ short rq_arghi; /* pages available in argument page list */
+ short rq_resused; /* pages used for result */
+
u32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
u32 rq_vers; /* program version */
@@ -136,6 +148,38 @@ struct svc_rqst {
wait_queue_head_t rq_wait; /* synchronization */
};
+/*
+ * Check buffer bounds after decoding arguments
+ */
+static inline int
+xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
+{
+ char *cp = (char *)p;
+ struct iovec *vec = &rqstp->rq_arg.head[0];
+ return cp - (char*)vec->iov_base <= vec->iov_len;
+}
+
+static inline int
+xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
+{
+ struct iovec *vec = &rqstp->rq_res.head[0];
+ char *cp = (char*)p;
+
+ vec->iov_len = cp - (char*)vec->iov_base;
+ rqstp->rq_res.len = vec->iov_len;
+
+ return vec->iov_len <= PAGE_SIZE;
+}
+
+static int inline take_page(struct svc_rqst *rqstp)
+{
+ if (rqstp->rq_arghi <= rqstp->rq_argused)
+ return -ENOMEM;
+ rqstp->rq_respages[rqstp->rq_resused++] =
+ rqstp->rq_argpages[--rqstp->rq_arghi];
+ return 0;
+}
+
struct svc_deferred_req {
struct svc_serv *serv;
u32 prot; /* protocol (UDP or TCP) */