From 8758819af6d985c6a7f2b7c7297b4c90c77561ab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:23:52 +0100 Subject: NFSv4/RPCSEC_GSS: Ensure that RPC userland upcalls time out correctly if the corresponding userland daemon is not up and running. --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index eabb2ebf2289..c3752710c74c 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -27,6 +27,7 @@ struct rpc_inode { #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; struct rpc_pipe_ops *ops; + struct work_struct queue_timeout; }; static inline struct rpc_inode * -- cgit v1.2.3 From 62d3ffc42969a84db8d1e045c79a5dbb7ad5f7ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:25:51 +0100 Subject: RPCSEC_GSS: Make the upcalls detect if the userland daemon dies while processing a request. --- include/linux/sunrpc/rpc_pipe_fs.h | 4 ++- net/sunrpc/auth_gss/auth_gss.c | 28 ++++++++++++++++ net/sunrpc/rpc_pipe.c | 67 ++++++++++++++++++++++++++------------ 3 files changed, 78 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index c3752710c74c..63929349571f 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -14,6 +14,7 @@ struct rpc_pipe_msg { struct rpc_pipe_ops { ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t); ssize_t (*downcall)(struct file *, const char __user *, size_t); + void (*release_pipe)(struct inode *); void (*destroy_msg)(struct rpc_pipe_msg *); }; @@ -21,8 +22,10 @@ struct rpc_inode { struct inode vfs_inode; void *private; struct list_head pipe; + struct list_head in_upcall; int pipelen; int nreaders; + int nwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; @@ -36,7 +39,6 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } -extern void rpc_inode_setowner(struct inode *, void *); extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6892bb79d8da..af67459e1cc4 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -482,6 +482,33 @@ err: return err; } +static void +gss_pipe_release(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct rpc_clnt *clnt; + struct rpc_auth *auth; + struct gss_auth *gss_auth; + + clnt = rpci->private; + auth = clnt->cl_auth; + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + spin_lock(&gss_auth->lock); + while (!list_empty(&gss_auth->upcalls)) { + struct gss_upcall_msg *gss_msg; + + gss_msg = list_entry(gss_auth->upcalls.next, + struct gss_upcall_msg, list); + gss_msg->msg.errno = -EPIPE; + atomic_inc(&gss_msg->count); + __gss_unhash_msg(gss_msg); + spin_unlock(&gss_auth->lock); + gss_release_msg(gss_msg); + spin_lock(&gss_auth->lock); + } + spin_unlock(&gss_auth->lock); +} + void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { @@ -774,6 +801,7 @@ static struct rpc_pipe_ops gss_upcall_ops = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, + .release_pipe = gss_pipe_release, }; /* diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 1841ac8cb0da..409e6aef143d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -50,18 +50,16 @@ __rpc_purge_upcall(struct inode *inode, int err) msg->errno = err; rpci->ops->destroy_msg(msg); } + while (!list_empty(&rpci->in_upcall)) { + msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); + list_del_init(&msg->list); + msg->errno = err; + rpci->ops->destroy_msg(msg); + } rpci->pipelen = 0; wake_up(&rpci->waitq); } -void -rpc_purge_upcall(struct inode *inode, int err) -{ - down(&inode->i_sem); - __rpc_purge_upcall(inode, err); - up(&inode->i_sem); -} - static void rpc_timeout_upcall_queue(void *data) { @@ -97,20 +95,31 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) return res; } -void -rpc_inode_setowner(struct inode *inode, void *private) +static void +rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); cancel_delayed_work(&rpci->queue_timeout); flush_scheduled_work(); down(&inode->i_sem); - rpci->private = private; - if (!private) + if (rpci->ops != NULL) { + rpci->nreaders = 0; __rpc_purge_upcall(inode, -EPIPE); + rpci->nwriters = 0; + if (rpci->ops->release_pipe) + rpci->ops->release_pipe(inode); + rpci->ops = NULL; + } up(&inode->i_sem); } +static inline void +rpc_inode_setowner(struct inode *inode, void *private) +{ + RPC_I(inode)->private = private; +} + static struct inode * rpc_alloc_inode(struct super_block *sb) { @@ -134,9 +143,11 @@ rpc_pipe_open(struct inode *inode, struct file *filp) int res = -ENXIO; down(&inode->i_sem); - if (rpci->private != NULL) { + if (rpci->ops != NULL) { if (filp->f_mode & FMODE_READ) rpci->nreaders ++; + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters ++; res = 0; } up(&inode->i_sem); @@ -149,16 +160,24 @@ rpc_pipe_release(struct inode *inode, struct file *filp) struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); struct rpc_pipe_msg *msg; + down(&inode->i_sem); + if (rpci->ops == NULL) + goto out; msg = (struct rpc_pipe_msg *)filp->private_data; if (msg != NULL) { msg->errno = -EPIPE; + list_del_init(&msg->list); rpci->ops->destroy_msg(msg); } - down(&inode->i_sem); + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters --; if (filp->f_mode & FMODE_READ) rpci->nreaders --; if (!rpci->nreaders) __rpc_purge_upcall(inode, -EPIPE); + if (rpci->ops->release_pipe) + rpci->ops->release_pipe(inode); +out: up(&inode->i_sem); return 0; } @@ -172,7 +191,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) int res = 0; down(&inode->i_sem); - if (!rpci->private) { + if (rpci->ops == NULL) { res = -EPIPE; goto out_unlock; } @@ -182,7 +201,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); - list_del_init(&msg->list); + list_move(&msg->list, &rpci->in_upcall); rpci->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; @@ -194,6 +213,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) res = rpci->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; + list_del_init(&msg->list); rpci->ops->destroy_msg(msg); } out_unlock: @@ -210,7 +230,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of down(&inode->i_sem); res = -EPIPE; - if (rpci->private != NULL) + if (rpci->ops != NULL) res = rpci->ops->downcall(filp, buf, len); up(&inode->i_sem); return res; @@ -226,7 +246,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) poll_wait(filp, &rpci->waitq, wait); mask = POLLOUT | POLLWRNORM; - if (rpci->private == NULL) + if (rpci->ops == NULL) mask |= POLLERR | POLLHUP; if (!list_empty(&rpci->pipe)) mask |= POLLIN | POLLRDNORM; @@ -242,7 +262,7 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp, switch (cmd) { case FIONREAD: - if (!rpci->private) + if (rpci->ops == NULL) return -EPIPE; len = rpci->pipelen; if (filp->private_data) { @@ -484,6 +504,7 @@ repeat: do { dentry = dvec[--n]; if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); simple_unlink(dir, dentry); } @@ -563,7 +584,10 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - rpc_inode_setowner(dentry->d_inode, NULL); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + rpc_inode_setowner(dentry->d_inode, NULL); + } if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -715,6 +739,7 @@ rpc_unlink(char *path) } d_drop(dentry); if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); error = simple_unlink(dir, dentry); } @@ -790,6 +815,8 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->nreaders = 0; + rpci->nwriters = 0; + INIT_LIST_HEAD(&rpci->in_upcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); -- cgit v1.2.3 From b26ef9fe3e350db44fa93605316de62b742fc13b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:29:20 +0100 Subject: NFSv4: Bugfixes and cleanups for the NFSv4 client name to uid mapper. Includes a fix by Tim Woods to deal with a caching bug in the case where a user and a group share the same numerical id and/or name. --- fs/nfs/idmap.c | 372 ++++++++++++++++++++++++---------------------- fs/nfs/nfs4xdr.c | 32 ++-- include/linux/nfs_idmap.h | 17 ++- 3 files changed, 219 insertions(+), 202 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 155eb0debd83..bd1d1335561c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,14 +52,16 @@ #include #define IDMAP_HASH_SZ 128 -#define IDMAP_HASH_TYPE_NAME 0x01 -#define IDMAP_HASH_TYPE_ID 0x02 -#define IDMAP_HASH_TYPE_INSERT 0x04 struct idmap_hashent { - uid_t ih_id; - char ih_name[IDMAP_NAMESZ]; - u_int32_t ih_namelen; + __u32 ih_id; + int ih_namelen; + char ih_name[IDMAP_NAMESZ]; +}; + +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; }; struct idmap { @@ -67,12 +69,10 @@ struct idmap { struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; - struct nfs_server *idmap_server; - struct semaphore idmap_lock; - struct semaphore idmap_im_lock; - struct semaphore idmap_hash_lock; - struct idmap_hashent idmap_id_hash[IDMAP_HASH_SZ]; - struct idmap_hashent idmap_name_hash[IDMAP_HASH_SZ]; + struct semaphore idmap_lock; /* Serializes upcalls */ + struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; }; static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, char *, @@ -80,10 +80,7 @@ static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, char *, static ssize_t idmap_pipe_downcall(struct file *, const char *, size_t); void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); -static int validate_ascii(char *, u_int32_t); - -static u_int32_t fnvhash32(void *, u_int32_t); -static int idmap_cache_lookup(struct idmap *, int, char *, u_int32_t *, uid_t *); +static unsigned int fnvhash32(const void *, size_t); static struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, @@ -101,20 +98,19 @@ nfs_idmap_new(struct nfs_server *server) memset(idmap, 0, sizeof(*idmap)); - idmap->idmap_server = server; - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", idmap->idmap_server->client->cl_pathname); + "%s/idmap", server->client->cl_pathname); idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, - idmap->idmap_server, &idmap_upcall_ops, 0); + idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) goto err_free; init_MUTEX(&idmap->idmap_lock); init_MUTEX(&idmap->idmap_im_lock); - init_MUTEX(&idmap->idmap_hash_lock); init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; return (idmap); @@ -135,35 +131,102 @@ nfs_idmap_delete(struct nfs_server *server) kfree(idmap); } +/* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +{ + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +{ + struct idmap_hashent *he = idmap_name_hash(h, name, len); + + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + return he; +} + +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + return he; +} + +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) +{ + return idmap_name_hash(h, name, len); +} + +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +{ + return idmap_id_hash(h, id); +} + +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) +{ + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; +} + /* * Name -> ID */ -int -nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, - u_int namelen, uid_t *id) +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_NAME; - u_int xnamelen = namelen; - - if (idmap == NULL) - return (-1); + int ret = -EIO; im = &idmap->idmap_im; - if (namelen > IDMAP_NAMESZ || namelen == 0) - return (-1); + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (name[xnamelen - 1] == '\0') - xnamelen--; - - if (idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id) == 0) { + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; ret = 0; goto out; } @@ -171,7 +234,7 @@ nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, memset(im, 0, sizeof(*im)); memcpy(im->im_name, name, namelen); - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_NAMETOID; memset(&msg, 0, sizeof(msg)); @@ -191,16 +254,9 @@ nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, remove_wait_queue(&idmap->idmap_wq, &wq); down(&idmap->idmap_im_lock); - /* - * XXX Race condition here, with testing for status. Go ahead - * and and do the cace lookup anyway. - */ if (im->im_status & IDMAP_STATUS_SUCCESS) { - ret = 0; *id = im->im_id; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id); + ret = 0; } out: @@ -213,35 +269,31 @@ nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, /* * ID -> Name */ -int -nfs_idmap_name(struct nfs_server *server, u_int8_t type, uid_t id, - char *name, u_int *namelen) +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_ID; - u_int len; - - if (idmap == NULL) - return (-1); + int ret = -EIO; + unsigned int len; im = &idmap->idmap_im; - if (*namelen < IDMAP_NAMESZ || *namelen == 0) - return (-1); - down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (idmap_cache_lookup(idmap, hashtype, name, namelen, &id) == 0) { - ret = 0; + he = idmap_lookup_id(h, id); + if (he != 0) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; goto out; } memset(im, 0, sizeof(*im)); - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_IDTONAME; im->im_id = id; @@ -256,9 +308,6 @@ nfs_idmap_name(struct nfs_server *server, u_int8_t type, uid_t id, goto out; } - /* - * XXX add timeouts here - */ set_current_state(TASK_UNINTERRUPTIBLE); up(&idmap->idmap_im_lock); schedule(); @@ -267,23 +316,20 @@ nfs_idmap_name(struct nfs_server *server, u_int8_t type, uid_t id, down(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { - if ((len = validate_ascii(im->im_name, IDMAP_NAMESZ)) == -1) + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) goto out; - ret = 0; memcpy(name, im->im_name, len); - *namelen = len; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, namelen, &id); + ret = len; } out: memset(im, 0, sizeof(*im)); up(&idmap->idmap_im_lock); up(&idmap->idmap_lock); - return (ret); + return ret; } +/* RPC pipefs upcall/downcall routines */ static ssize_t idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, char *dst, size_t buflen) @@ -310,10 +356,12 @@ static ssize_t idmap_pipe_downcall(struct file *filp, const char *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); - struct nfs_server *server = rpci->private; - struct idmap *idmap = server->idmap; + struct idmap *idmap = (struct idmap *)rpci->private; struct idmap_msg im_in, *im = &idmap->idmap_im; - int match = 0, hashtype, badmsg = 0, namelen_in, namelen; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; + int namelen_in; + int ret; if (mlen != sizeof(im_in)) return (-ENOSPC); @@ -323,39 +371,66 @@ idmap_pipe_downcall(struct file *filp, const char *src, size_t mlen) down(&idmap->idmap_im_lock); - namelen_in = validate_ascii(im_in.im_name, IDMAP_NAMESZ); - namelen = validate_ascii(im->im_name, IDMAP_NAMESZ); + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); + goto out; + } + + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + goto out; - badmsg = !(im_in.im_status & IDMAP_STATUS_SUCCESS) || namelen_in <= 0; + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; + } switch (im_in.im_conv) { case IDMAP_CONV_IDTONAME: - match = im->im_id == im_in.im_id; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); break; case IDMAP_CONV_NAMETOID: - match = namelen == namelen_in && - memcmp(im->im_name, im_in.im_name, namelen) == 0; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); break; default: - badmsg = 1; - break; - } - - match = match && im->im_type == im_in.im_type; - - if (match) { - memcpy(im, &im_in, sizeof(*im)); - wake_up(&idmap->idmap_wq); - } else if (!badmsg) { - hashtype = im_in.im_conv == IDMAP_CONV_IDTONAME ? - IDMAP_HASH_TYPE_ID : IDMAP_HASH_TYPE_NAME; - hashtype |= IDMAP_HASH_TYPE_INSERT; - idmap_cache_lookup(idmap, hashtype, im_in.im_name, &namelen_in, - &im_in.im_id); + goto out; } + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; +out: up(&idmap->idmap_im_lock); - return (mlen); + return ret; } void @@ -372,108 +447,51 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) up(&idmap->idmap_im_lock); } -static int -validate_ascii(char *string, u_int32_t len) -{ - int i; - - for (i = 0; i < len; i++) { - if (string[i] == '\0') - break; - - if (string[i] & 0x80) - return (-1); - } - - if (string[i] != '\0') - return (-1); - - return (i); -} - /* * Fowler/Noll/Vo hash * http://www.isthe.com/chongo/tech/comp/fnv/ */ -#define FNV_P_32 ((u_int32_t)0x01000193) /* 16777619 */ -#define FNV_1_32 ((u_int32_t)0x811c9dc5) /* 2166136261 */ +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ -static u_int32_t -fnvhash32(void *buf, u_int32_t buflen) +static unsigned int fnvhash32(const void *buf, size_t buflen) { - u_char *p, *end = (u_char *)buf + buflen; - u_int32_t hash = FNV_1_32; + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; for (p = buf; p < end; p++) { hash *= FNV_P_32; - hash ^= (u_int32_t)*p; + hash ^= (unsigned int)*p; } return (hash); } -/* - * ->ih_namelen == 0 indicates negative entry - */ -static int -idmap_cache_lookup(struct idmap *idmap, int type, char *name, u_int32_t *namelen, - uid_t *id) +int nfs_map_name_to_uid(struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { - u_int32_t hash; - struct idmap_hashent *he = NULL; - int insert = type & IDMAP_HASH_TYPE_INSERT; - int ret = -1; - - /* - * XXX technically, this is not needed, since we will always - * hold idmap_im_lock when altering the hash tables. but - * semantically that just hurts. - * - * XXX cache negative responses - */ - down(&idmap->idmap_hash_lock); + struct idmap *idmap = server->idmap; - if (*namelen > IDMAP_NAMESZ || *namelen == 0) - goto out; + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); +} - if (type & IDMAP_HASH_TYPE_NAME) { - hash = fnvhash32(name, *namelen) % IDMAP_HASH_SZ; - he = &idmap->idmap_name_hash[hash]; - - /* - * Testing he->ih_namelen == *namelen implicitly tests - * namelen != 0, and thus a non-negative entry. - */ - if (!insert && he->ih_namelen == *namelen && - memcmp(he->ih_name, name, *namelen) == 0) { - *id = he->ih_id; - ret = 0; - goto out; - } - } +int nfs_map_group_to_gid(struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = server->idmap; - if (type & IDMAP_HASH_TYPE_ID) { - hash = fnvhash32(id, sizeof(*id)) % IDMAP_HASH_SZ; - he = &idmap->idmap_id_hash[hash]; + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); +} - if (!insert && *id == he->ih_id && he->ih_namelen != 0 && - *namelen >= he->ih_namelen) { - memcpy(name, he->ih_name, he->ih_namelen); - *namelen = he->ih_namelen; - ret = 0; - goto out; - } - } +int nfs_map_uid_to_name(struct nfs_server *server, __u32 uid, char *buf) +{ + struct idmap *idmap = server->idmap; - if (insert && he != NULL) { - he->ih_id = *id; - memcpy(he->ih_name, name, *namelen); - he->ih_namelen = *namelen; - ret = 0; - } + return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); +} +int nfs_map_gid_to_group(struct nfs_server *server, __u32 uid, char *buf) +{ + struct idmap *idmap = server->idmap; - out: - up(&idmap->idmap_hash_lock); - return (ret); + return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } + diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f2a909d75468..35a28e9e7d21 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -239,10 +239,10 @@ static int encode_attrs(struct xdr_stream *xdr, struct iattr *iap, struct nfs_server *server) { - char owner_name[256]; - char owner_group[256]; - int owner_namelen = sizeof(owner_name); - int owner_grouplen = sizeof(owner_group); + char owner_name[IDMAP_NAMESZ]; + char owner_group[IDMAP_NAMESZ]; + int owner_namelen = 0; + int owner_grouplen = 0; uint32_t *p; uint32_t *q; int len; @@ -265,9 +265,8 @@ encode_attrs(struct xdr_stream *xdr, struct iattr *iap, if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - status = nfs_idmap_name(server, IDMAP_TYPE_USER, - iap->ia_uid, owner_name, &owner_namelen); - if (status < 0) { + owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name); + if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); /* XXX */ @@ -278,9 +277,8 @@ encode_attrs(struct xdr_stream *xdr, struct iattr *iap, len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - status = nfs_idmap_name(server, IDMAP_TYPE_GROUP, - iap->ia_gid, owner_group, &owner_grouplen); - if (status < 0) { + owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group); + if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); strcpy(owner_group, "nobody"); @@ -1475,10 +1473,9 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_idmap_id(server, IDMAP_TYPE_USER, - (char *)p, dummy32, &nfp->uid)) == -1) { - dprintk("read_attrs: gss_get_num failed!\n"); - /* goto out; */ + if ((status = nfs_map_name_to_uid(server, (char *)p, dummy32, + &nfp->uid)) < 0) { + dprintk("read_attrs: name-to-uid mapping failed!\n"); nfp->uid = -2; } dprintk("read_attrs: uid=%d\n", (int)nfp->uid); @@ -1493,11 +1490,10 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_idmap_id(server, IDMAP_TYPE_GROUP, - (char *)p, dummy32, &nfp->gid)) == -1) { - dprintk("read_attrs: gss_get_num failed!\n"); + if ((status = nfs_map_group_to_gid(server, (char *)p, dummy32, + &nfp->gid)) < 0) { + dprintk("read_attrs: group-to-gid mapping failed!\n"); nfp->gid = -2; - /* goto out; */ } dprintk("read_attrs: gid=%d\n", (int)nfp->gid); } diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 248adf707071..c95076e5941b 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -52,18 +52,21 @@ #define IDMAP_STATUS_SUCCESS 0x08 struct idmap_msg { - u_int8_t im_type; - u_int8_t im_conv; - char im_name[IDMAP_NAMESZ]; - u_int32_t im_id; - u_int8_t im_status; + __u8 im_type; + __u8 im_conv; + char im_name[IDMAP_NAMESZ]; + __u32 im_id; + __u8 im_status; }; #ifdef __KERNEL__ void *nfs_idmap_new(struct nfs_server *); void nfs_idmap_delete(struct nfs_server *); -int nfs_idmap_id(struct nfs_server *, u_int8_t, char *, u_int, uid_t *); -int nfs_idmap_name(struct nfs_server *, u_int8_t, uid_t, char *, u_int *); + +int nfs_map_name_to_uid(struct nfs_server *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs_server *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs_server *, __u32, char *); +int nfs_map_gid_to_group(struct nfs_server *, __u32, char *); #endif /* __KERNEL__ */ #endif /* NFS_IDMAP_H */ -- cgit v1.2.3 From 3f13d9aac2492d1f1c543453785f25bff0255aab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:34:13 +0100 Subject: RPCSEC_GSS: Miscellaneous cleanups of the krb5 code required for the integrity checksumming mode. --- include/linux/sunrpc/gss_krb5.h | 20 +---- net/sunrpc/auth_gss/gss_krb5_crypto.c | 44 +++++------ net/sunrpc/auth_gss/gss_krb5_mech.c | 28 +++---- net/sunrpc/auth_gss/gss_krb5_seal.c | 43 +++-------- net/sunrpc/auth_gss/gss_krb5_unseal.c | 134 +++++++++------------------------- 5 files changed, 84 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 8db6d1e13a69..aac2ad4f7d56 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -50,7 +50,6 @@ struct krb5_ctx { struct crypto_tfm *seq; s32 endtime; u32 seq_send; - u32 seq_recv; struct xdr_netobj mech_used; }; @@ -73,7 +72,7 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; -#define RSA_MD5_CKSUM_LENGTH 16 +#define KRB5_CKSUM_LENGTH 8 #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 @@ -100,16 +99,6 @@ enum seal_alg { #define KG_EMPTY_CCACHE (39756044L) #define KG_NO_CTYPES (39756045L) -#define KV5M_PRINCIPAL (-1760647423L) -#define KV5M_KEYBLOCK (-1760647421L) -#define KV5M_CHECKSUM (-1760647420L) -#define KV5M_ADDRESS (-1760647390L) -#define KV5M_AUTHENTICATOR (-1760647410L) -#define KV5M_AUTH_CONTEXT (-1760647383L) -#define KV5M_AUTHDATA (-1760647414L) -#define KV5M_GSS_OID (-1760647372L) -#define KV5M_GSS_QUEUE (-1760647371L) - /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. */ @@ -126,14 +115,13 @@ enum seal_alg { #define ENCTYPE_UNKNOWN 0x01ff s32 -krb5_make_checksum(s32 cksumtype, - struct xdr_netobj *input, +krb5_make_checksum(s32 cksumtype, char *header, char *body, int body_len, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, - struct xdr_netobj * input_message_buffer, - struct xdr_netobj * output_message_buffer, int toktype); + struct xdr_netobj *input_message_buffer, + struct xdr_netobj *output_message_buffer, int toktype); u32 krb5_read_token(struct krb5_ctx *context_handle, diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index c3a8f548a482..9894d83ddf6f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -122,14 +122,23 @@ out: return(ret); } +void +buf_to_sg(struct scatterlist *sg, char *ptr, int len) { + sg->page = virt_to_page(ptr); + sg->offset = offset_in_page(ptr); + sg->length = len; +} + +/* checksum the plaintext data and the first 8 bytes of the krb5 token header, + * as specified by the rfc: */ s32 -krb5_make_checksum(s32 cksumtype, struct xdr_netobj *input, +krb5_make_checksum(s32 cksumtype, char *header, char *body, int body_len, struct xdr_netobj *cksum) { - s32 ret = -EINVAL; - struct scatterlist sg[1]; - char *cksumname; - struct crypto_tfm *tfm; + char *cksumname; + struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ + struct scatterlist sg[2]; + u32 code = GSS_S_FAILURE; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -143,24 +152,17 @@ krb5_make_checksum(s32 cksumtype, struct xdr_netobj *input, if (!(tfm = crypto_alloc_tfm(cksumname, 0))) goto out; cksum->len = crypto_tfm_alg_digestsize(tfm); + if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) + goto out; - if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) { - ret = -ENOMEM; - goto out_free_tfm; - } - sg[0].page = virt_to_page(input->data); - sg[0].offset = offset_in_page(input->data); - sg[0].length = input->len; - + buf_to_sg(&sg[0], header, 8); + buf_to_sg(&sg[1], body, body_len); crypto_digest_init(tfm); - crypto_digest_update(tfm, sg, 1); + crypto_digest_update(tfm, sg, 2); crypto_digest_final(tfm, cksum->data); - - ret = 0; - -out_free_tfm: - crypto_free_tfm(tfm); + code = 0; out: - dprintk("RPC: gss_k5cksum: returning %d\n", ret); - return (ret); + if (tfm) + crypto_free_tfm(tfm); + return code; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index ce31a89684e4..61282b4d9c3b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -98,7 +98,7 @@ get_key(char **p, char *end, struct crypto_tfm **res) alg_mode = CRYPTO_TFM_MODE_CBC; break; default: - dprintk("RPC: get_key: unsupported algorithm %d", alg); + dprintk("RPC: get_key: unsupported algorithm %d\n", alg); goto out_err_free_key; } if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) @@ -168,7 +168,7 @@ out_err: return GSS_S_FAILURE; } -void +static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; @@ -181,16 +181,16 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -u32 +static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, - struct xdr_netobj *signbuf, - struct xdr_netobj *checksum, - u32 *qstate) { + struct xdr_netobj *message, + struct xdr_netobj *mic_token, + u32 *qstate) { u32 maj_stat = 0; int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, checksum, signbuf, &qop_state, + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, KG_TOK_MIC_MSG); if (!maj_stat && qop_state) *qstate = qop_state; @@ -199,21 +199,17 @@ gss_verify_mic_kerberos(struct gss_ctx *ctx, return maj_stat; } -u32 +static u32 gss_get_mic_kerberos(struct gss_ctx *ctx, u32 qop, - struct xdr_netobj *message_buffer, - struct xdr_netobj *message_token) { + struct xdr_netobj *message, + struct xdr_netobj *mic_token) { u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - if (!message_buffer->data) return GSS_S_FAILURE; - - dprintk("RPC: gss_get_mic_kerberos:" - " message_buffer->len %d\n",message_buffer->len); + if (!message->data) return GSS_S_FAILURE; - err = krb5_make_token(kctx, qop, message_buffer, - message_token, KG_TOK_MIC_MSG); + err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 280d82d7c6dc..e664d3ea98ce 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -63,14 +63,13 @@ #include #include #include +#include #include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define CKSUM_SIZE 8 - static inline int gss_krb5_padding(int blocksize, int length) { /* Most of the code is block-size independent but in practice we @@ -79,29 +78,6 @@ gss_krb5_padding(int blocksize, int length) { return 8 - (length & 7); } -/* checksum the plaintext data and the first 8 bytes of the krb5 token header, - * as specified by the rfc: */ -static u32 -compute_checksum(s32 checksum_type, char *header, char *body, int body_len, - struct xdr_netobj *md5cksum) { - char *data_ptr; - struct xdr_netobj plaind; - u32 code = GSS_S_FAILURE; - - if (!(data_ptr = kmalloc(8 + body_len, GFP_KERNEL))) - goto out; - memcpy(data_ptr, header, 8); - memcpy(data_ptr + 8, body, body_len); - plaind.len = 8 + body_len; - plaind.data = data_ptr; - code = krb5_make_checksum(checksum_type, &plaind, md5cksum); - kfree(data_ptr); - code = 0; - -out: - return code; -} - u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, struct xdr_netobj * text, struct xdr_netobj * token, @@ -113,10 +89,12 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; - dprintk("RPC: gss_krb5_seal"); + dprintk("RPC: gss_krb5_seal\n"); now = jiffies; + token->data = NULL; + if (qop_req != 0) goto out_err; @@ -167,8 +145,8 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, memset(msg_start + blocksize + text->len, pad, pad); - if (compute_checksum(checksum_type, krb5_hdr, msg_start, - tmsglen, &md5cksum)) + if (krb5_make_checksum(checksum_type, krb5_hdr, msg_start, + tmsglen, &md5cksum)) goto out_err; if (krb5_encrypt(ctx->enc, NULL, msg_start, msg_start, @@ -176,8 +154,8 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, goto out_err; } else { /* Sign only. */ - if (compute_checksum(checksum_type, krb5_hdr, text->data, - text->len, &md5cksum)) + if (krb5_make_checksum(checksum_type, krb5_hdr, text->data, + text->len, &md5cksum)) goto out_err; } @@ -187,10 +165,11 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, md5cksum.data, md5cksum.len)) goto out_err; memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - CKSUM_SIZE, CKSUM_SIZE); + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); dprintk("make_seal_token: cksum data: \n"); - print_hexl((u32 *) (krb5_hdr + 16), CKSUM_SIZE, 0); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); break; default: BUG(); diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 836c683777f2..8b2795d701db 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -68,7 +68,12 @@ #endif -/* message_buffer is an input if MIC and an output if WRAP. */ +/* message_buffer is an input if toktype is MIC and an output if it is WRAP: + * If toktype is MIC: read_token is a mic token, and message_buffer is the + * data that the mic was supposedly taken over. + * If toktype is WRAP: read_token is a wrap token, and message_buffer is used + * to return the decrypted data. + */ u32 krb5_read_token(struct krb5_ctx *ctx, @@ -76,20 +81,13 @@ krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *message_buffer, int *qop_state, int toktype) { - s32 code; - int tmsglen = 0; - int conflen = 0; int signalg; int sealalg; struct xdr_netobj token = {.len = 0, .data = NULL}; s32 checksum_type; - struct xdr_netobj cksum; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - struct xdr_netobj plaind; - char *data_ptr; s32 now; unsigned char *plain = NULL; - int cksum_len = 0; int plainlen = 0; int direction; s32 seqnum; @@ -97,10 +95,9 @@ krb5_read_token(struct krb5_ctx *ctx, int bodysize; u32 ret = GSS_S_DEFECTIVE_TOKEN; - dprintk("RPC: krb5_read_token\n"); + dprintk("RPC: krb5_read_token\n"); - if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, - &bodysize, &ptr, toktype, + if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, toktype, read_token->len)) goto out; @@ -138,40 +135,22 @@ krb5_read_token(struct krb5_ctx *ctx, signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) goto out; - /* starting with a single alg */ - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - cksum_len = 8; - break; - default: - goto out; - } - - if (toktype == KG_TOK_WRAP_MSG) - tmsglen = bodysize - (14 + cksum_len); - - /* get the token parameters */ - - /* decode the message, if WRAP */ - if (toktype == KG_TOK_WRAP_MSG) { - dprintk("RPC: krb5_read_token KG_TOK_WRAP_MSG\n"); + int conflen = crypto_tfm_alg_blocksize(ctx->enc); + int padlen; - plain = kmalloc(tmsglen, GFP_KERNEL); - ret = GSS_S_FAILURE; - if (plain == NULL) - goto out; + plainlen = bodysize - (14 + KRB5_CKSUM_LENGTH); + plain = ptr + 14 + KRB5_CKSUM_LENGTH; - code = krb5_decrypt(ctx->enc, NULL, - ptr + 14 + cksum_len, plain, - tmsglen); - if (code) + ret = krb5_decrypt(ctx->enc, NULL, plain, plain, plainlen); + if (ret) goto out; - plainlen = tmsglen; - - conflen = crypto_tfm_alg_blocksize(ctx->enc); - token.len = tmsglen - conflen - plain[tmsglen - 1]; + ret = GSS_S_FAILURE; + padlen = plain[plainlen -1]; + if ((padlen < 1) || (padlen > 8)) + goto out; + token.len = plainlen - conflen - padlen; if (token.len) { token.data = kmalloc(token.len, GFP_KERNEL); @@ -181,15 +160,13 @@ krb5_read_token(struct krb5_ctx *ctx, } } else if (toktype == KG_TOK_MIC_MSG) { - dprintk("RPC: krb5_read_token KG_TOK_MIC_MSG\n"); token = *message_buffer; plain = token.data; plainlen = token.len; } else { - token.len = 0; - token.data = NULL; - plain = token.data; - plainlen = token.len; + printk("RPC: bad toktype in krb5_read_token"); + ret = GSS_S_FAILURE; + goto out; } dprintk("RPC krb5_read_token: token.len %d plainlen %d\n", token.len, @@ -209,67 +186,26 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: - dprintk("RPC krb5_read_token SGN_ALG_DES_MAC_MD5\n"); - /* compute the checksum of the message. - * 8 = bytes of token body to be checksummed according to spec - */ - - data_ptr = kmalloc(8 + plainlen, GFP_KERNEL); - ret = GSS_S_FAILURE; - if (!data_ptr) + ret = krb5_make_checksum(checksum_type, ptr - 2, plain, + plainlen, &md5cksum); + if (ret) goto out; - memcpy(data_ptr, ptr - 2, 8); - memcpy(data_ptr + 8, plain, plainlen); - - plaind.len = 8 + plainlen; - plaind.data = data_ptr; - - code = krb5_make_checksum(checksum_type, - &plaind, &md5cksum); - - kfree(data_ptr); - - if (code) + ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data, + md5cksum.data, 16); + if (ret) goto out; - code = krb5_encrypt(ctx->seq, NULL, md5cksum.data, - md5cksum.data, 16); - if (code) + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; goto out; - - if (signalg == 0) - cksum.len = 8; - else - cksum.len = 16; - cksum.data = md5cksum.data + 16 - cksum.len; - - dprintk - ("RPC: krb5_read_token: memcmp digest cksum.len %d:\n", - cksum.len); - dprintk(" md5cksum.data\n"); - print_hexl((u32 *) md5cksum.data, 16, 0); - dprintk(" cksum.data:\n"); - print_hexl((u32 *) cksum.data, cksum.len, 0); - { - u32 *p; - - (u8 *) p = ptr + 14; - dprintk(" ptr+14:\n"); - print_hexl(p, cksum.len, 0); } - - code = memcmp(cksum.data, ptr + 14, cksum.len); break; default: ret = GSS_S_DEFECTIVE_TOKEN; goto out; } - ret = GSS_S_BAD_SIG; - if (code) - goto out; - /* it got through unscathed. Make sure the context is unexpired */ if (toktype == KG_TOK_WRAP_MSG) @@ -287,8 +223,8 @@ krb5_read_token(struct krb5_ctx *ctx, /* do sequencing checks */ ret = GSS_S_BAD_SIG; - if ((code = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum))) + if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) goto out; if ((ctx->initiate && direction != 0xff) || @@ -298,9 +234,7 @@ krb5_read_token(struct krb5_ctx *ctx, ret = GSS_S_COMPLETE; out: if (md5cksum.data) kfree(md5cksum.data); - if (toktype == KG_TOK_WRAP_MSG) { - if (plain) kfree(plain); - if (ret && token.data) kfree(token.data); - } + if ((toktype == KG_TOK_WRAP_MSG) && ret && token.data) + kfree(token.data); return ret; } -- cgit v1.2.3 From 955ac3514fc5eb754827bd69edbe09012b95beb8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:44:01 +0100 Subject: RPCSEC_GSS: Client-side only support for rpcsec_gss integrity protection. Since this requires checksumming an entire request, instead of just the header, and since the request may include, for example, pages with write data, we modify the gss_api routines to pass xdr_bufs instead of xdr_netobjs where necessary. We add rpcauth_wrap_req and rpcauth_unwrap_resp to rpcauth.c, wrappers for the new rpc cred ops crwrap_req and crunwrap_req, which are called just before encoding, and just after decoding, respectively. --- include/linux/sunrpc/auth.h | 6 + include/linux/sunrpc/gss_api.h | 9 +- include/linux/sunrpc/gss_krb5.h | 6 +- include/linux/sunrpc/xdr.h | 4 + net/sunrpc/auth.c | 29 +++++ net/sunrpc/auth_gss/auth_gss.c | 231 ++++++++++++++++++++++++++++------ net/sunrpc/auth_gss/gss_krb5_crypto.c | 51 ++++++-- net/sunrpc/auth_gss/gss_krb5_mech.c | 8 +- net/sunrpc/auth_gss/gss_krb5_seal.c | 23 +--- net/sunrpc/auth_gss/gss_krb5_unseal.c | 58 ++------- net/sunrpc/auth_gss/gss_mech_switch.c | 4 +- net/sunrpc/clnt.c | 6 +- net/sunrpc/sunrpc_syms.c | 3 + net/sunrpc/xdr.c | 144 ++++++++++++++++++++- 14 files changed, 450 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 70cc3360e608..a68f18bf0a46 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -102,6 +102,10 @@ struct rpc_credops { u32 * (*crmarshal)(struct rpc_task *, u32 *, int); int (*crrefresh)(struct rpc_task *); u32 * (*crvalidate)(struct rpc_task *, u32 *); + int (*crwrap_req)(struct rpc_task *, kxdrproc_t, + void *, u32 *, void *); + int (*crunwrap_resp)(struct rpc_task *, kxdrproc_t, + void *, u32 *, void *); }; extern struct rpc_authops authunix_ops; @@ -124,6 +128,8 @@ void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); u32 * rpcauth_marshcred(struct rpc_task *, u32 *); u32 * rpcauth_checkverf(struct rpc_task *, u32 *); +int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *data, void *obj); +int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *data, void *obj); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 35988e7bfb77..cbb60ac22fd4 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -16,6 +16,7 @@ #ifdef __KERNEL__ #include +#include /* The mechanism-independent gss-api context: */ struct gss_ctx { @@ -39,11 +40,11 @@ u32 gss_import_sec_context( u32 gss_get_mic( struct gss_ctx *ctx_id, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token); u32 gss_verify_mic( struct gss_ctx *ctx_id, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); u32 gss_delete_sec_context( @@ -95,11 +96,11 @@ struct gss_api_ops { u32 (*gss_get_mic)( struct gss_ctx *ctx_id, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token); u32 (*gss_verify_mic)( struct gss_ctx *ctx_id, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); void (*gss_delete_sec_context)( diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index aac2ad4f7d56..9616746407f3 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -115,18 +115,18 @@ enum seal_alg { #define ENCTYPE_UNKNOWN 0x01ff s32 -krb5_make_checksum(s32 cksumtype, char *header, char *body, int body_len, +krb5_make_checksum(s32 cksumtype, char *header, struct xdr_buf *body, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, - struct xdr_netobj *input_message_buffer, + struct xdr_buf *input_message_buffer, struct xdr_netobj *output_message_buffer, int toktype); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_netobj *message_buffer, + struct xdr_buf *message_buffer, int *qop_state, int toktype); u32 diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2c6f76d1cc14..8082a0029100 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -141,6 +141,10 @@ void xdr_shift_iovec(struct iovec *, int, size_t); extern int xdr_kmap(struct iovec *, struct xdr_buf *, size_t); extern void xdr_kunmap(struct xdr_buf *, size_t); extern void xdr_shift_buf(struct xdr_buf *, size_t); +extern void _copy_from_pages(char *, struct page **, size_t, size_t); +extern void xdr_buf_from_iov(struct iovec *, struct xdr_buf *); +extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); +extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); /* * Helper structure for copying from an sk_buff. diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f1a73646e6ed..a89b30cadff3 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -339,6 +339,35 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p) return cred->cr_ops->crvalidate(task, p); } +int +rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, + u32 *data, void *obj) +{ + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + dprintk("RPC: %4d using %s cred %p to wrap rpc data\n", + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred->cr_ops->crwrap_req) + return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); + /* By default, we encode the arguments normally. */ + return encode(rqstp, data, obj); +} + +int +rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, + u32 *data, void *obj) +{ + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n", + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred->cr_ops->crunwrap_resp) + return cred->cr_ops->crunwrap_resp(task, decode, rqstp, + data, obj); + /* By default, we decode the arguments normally. */ + return decode(rqstp, data, obj); +} + int rpcauth_refreshcred(struct rpc_task *task) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 61618c6bed5e..1a4b9f504aa2 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -51,6 +51,7 @@ #include #include #include +#include #include static struct rpc_authops authgss_ops; @@ -65,7 +66,9 @@ static struct rpc_credops gss_credops; #define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ -#define GSS_VERF_SLACK 48 /* length of a krb5 verifier.*/ +/* length of a krb5 verifier (48), plus data added before arguments when + * using integrity (two 4-byte integers): */ +#define GSS_VERF_SLACK 56 /* XXX this define must match the gssd define * as it is passed to gssd to signal the use of @@ -669,21 +672,14 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid) struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 *cred_len; struct rpc_rqst *req = task->tk_rqstp; - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; - u32 *verfbase = req->rq_svec[0].iov_base; u32 maj_stat = 0; - struct xdr_netobj bufin,bufout; + struct xdr_netobj mic; + struct iovec iov; + struct xdr_buf verf_buf; u32 service; dprintk("RPC: gss_marshal\n"); - /* We compute the checksum for the verifier over the xdr-encoded bytes - * starting with the xid (which verfbase points to) and ending at - * the end of the credential. */ - if (xprt->stream) - verfbase++; /* See clnt.c:call_header() */ - *p++ = htonl(RPC_AUTH_GSS); cred_len = p++; @@ -704,24 +700,28 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid) p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = htonl((p - (cred_len + 1)) << 2); - /* Marshal verifier. */ - bufin.data = (u8 *)verfbase; - bufin.len = (p - verfbase) << 2; + /* We compute the checksum for the verifier over the xdr-encoded bytes + * starting with the xid and ending at the end of the credential: */ + iov.iov_base = req->rq_snd_buf.head[0].iov_base; + if (task->tk_client->cl_xprt->stream) + /* See clnt.c:call_header() */ + iov.iov_base += 4; + iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; + xdr_buf_from_iov(&iov, &verf_buf); /* set verifier flavor*/ *p++ = htonl(RPC_AUTH_GSS); - bufout.data = (u8 *)(p + 1); + mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, - &bufin, &bufout); + &verf_buf, &mic); if(maj_stat != 0){ - printk("gss_marshal: gss_get_mic FAILED (%d)\n", - maj_stat); + printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; } - *p++ = htonl(bufout.len); - p += XDR_QUADLEN(bufout.len); + *p++ = htonl(mic.len); + p += XDR_QUADLEN(mic.len); gss_put_ctx(ctx); return p; out_put_ctx: @@ -749,35 +749,45 @@ static u32 * gss_validate(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 seq, qop_state; - struct xdr_netobj bufin; - struct xdr_netobj bufout; + struct iovec iov; + struct xdr_buf verf_buf; + struct xdr_netobj mic; u32 flav,len; + u32 service; dprintk("RPC: gss_validate\n"); flav = ntohl(*p++); - if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) { - printk("RPC: giant verf size: %ld\n", (unsigned long) len); + if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) goto out_bad; - } - dprintk("RPC: gss_validate: verifier flavor %d, len %d\n", flav, len); - - if (flav != RPC_AUTH_GSS) { - printk("RPC: bad verf flavor: %ld\n", (unsigned long)flav); + if (flav != RPC_AUTH_GSS) goto out_bad; - } seq = htonl(task->tk_gss_seqno); - bufin.data = (u8 *) &seq; - bufin.len = sizeof(seq); - bufout.data = (u8 *) p; - bufout.len = len; - - if (gss_verify_mic(ctx->gc_gss_ctx, &bufin, &bufout, &qop_state) != 0) - goto out_bad; - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; - dprintk("RPC: GSS gss_validate: gss_verify_mic succeeded.\n"); + iov.iov_base = &seq; + iov.iov_len = sizeof(seq); + xdr_buf_from_iov(&iov, &verf_buf); + mic.data = (u8 *)p; + mic.len = len; + + if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) + goto out_bad; + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + /* verifier data, flavor, length: */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; + break; + case RPC_GSS_SVC_INTEGRITY: + /* verifier data, flavor, length, length, sequence number: */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; + break; + default: + goto out_bad; + } gss_put_ctx(ctx); return p + XDR_QUADLEN(len); out_bad: @@ -785,6 +795,147 @@ out_bad: return NULL; } +static int +gss_wrap_req(struct rpc_task *task, + kxdrproc_t encode, void *rqstp, u32 *p, void *obj) +{ + struct rpc_rqst *req = (struct rpc_rqst *)rqstp; + struct xdr_buf *snd_buf = &req->rq_snd_buf; + struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + u32 *integ_len = NULL; + int status = -EIO; + u32 maj_stat = 0; + struct xdr_buf integ_buf; + struct xdr_netobj mic; + u32 service; + u32 offset, *q; + struct iovec *iov; + + dprintk("RPC: gss_wrap_body\n"); + BUG_ON(!ctx); + if (ctx->gc_proc != RPC_GSS_PROC_DATA) { + /* The spec seems a little ambiguous here, but I think that not + * wrapping context destruction requests makes the most sense. + */ + status = encode(rqstp, p, obj); + goto out; + } + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + status = encode(rqstp, p, obj); + goto out; + case RPC_GSS_SVC_INTEGRITY: + + integ_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + *p++ = htonl(task->tk_gss_seqno); + + status = encode(rqstp, p, obj); + if (status) + goto out; + + if (xdr_buf_subsegment(snd_buf, &integ_buf, + offset, snd_buf->len - offset)) + goto out; + *integ_len = htonl(integ_buf.len); + + /* guess whether we're in the head or the tail: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) + iov = snd_buf->tail; + else + iov = snd_buf->head; + p = iov->iov_base + iov->iov_len; + mic.data = (u8 *)(p + 1); + + maj_stat = gss_get_mic(ctx->gc_gss_ctx, + GSS_C_QOP_DEFAULT, &integ_buf, &mic); + status = -EIO; /* XXX? */ + if (maj_stat) + goto out; + q = p; + *q++ = htonl(mic.len); + q += XDR_QUADLEN(mic.len); + + offset = (u8 *)q - (u8 *)p; + iov->iov_len += offset; + snd_buf->len += offset; + break; + case RPC_GSS_SVC_PRIVACY: + default: + goto out; + } + status = 0; +out: + gss_put_ctx(ctx); + dprintk("RPC: gss_wrap_req returning %d\n", status); + return status; +} + +static int +gss_unwrap_resp(struct rpc_task *task, + kxdrproc_t decode, void *rqstp, u32 *p, void *obj) +{ + struct rpc_rqst *req = (struct rpc_rqst *)rqstp; + struct xdr_buf *rcv_buf = &req->rq_rcv_buf; + struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf integ_buf; + struct xdr_netobj mic; + int status = -EIO; + u32 maj_stat = 0; + u32 service; + u32 data_offset, mic_offset; + u32 integ_len; + + BUG_ON(!ctx); + + if (ctx->gc_proc != RPC_GSS_PROC_DATA) + goto out_decode; + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + goto out_decode; + case RPC_GSS_SVC_INTEGRITY: + integ_len = ntohl(*p++); + if (integ_len & 3) + goto out; + data_offset = (u8 *)p - (u8 *)rcv_buf->head[0].iov_base; + mic_offset = integ_len + data_offset; + if (mic_offset > rcv_buf->len) + goto out; + if (ntohl(*p++) != task->tk_gss_seqno) + goto out; + + if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, + mic_offset - data_offset)) + goto out; + + if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) + goto out; + + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, + &mic, NULL); + if (maj_stat != GSS_S_COMPLETE) + goto out; + break; + case RPC_GSS_SVC_PRIVACY: + default: + goto out; + } +out_decode: + status = decode(rqstp, p, obj); +out: + gss_put_ctx(ctx); + dprintk("RPC: gss_unwrap_resp returning %d\n", status); + return status; +} + static struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, @@ -802,6 +953,8 @@ static struct rpc_credops gss_credops = { .crmarshal = gss_marshal, .crrefresh = gss_refresh, .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, }; static struct rpc_pipe_ops gss_upcall_ops = { diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 9894d83ddf6f..5a5f859ad628 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #ifdef RPC_DEBUG @@ -57,7 +58,7 @@ krb5_encrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: gss_k5encrypt: TOP in %p out %p\nin data:\n", out, in); + dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) @@ -79,8 +80,10 @@ krb5_encrypt( ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + dprintk("RPC: krb5_encrypt: output data:\n"); + print_hexl((u32 *)out, length, 0); out: - dprintk("gss_k5encrypt returns %d\n",ret); + dprintk("krb5_encrypt returns %d\n",ret); return(ret); } @@ -96,8 +99,8 @@ krb5_decrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: gss_k5decrypt: TOP in %p out %p\nin data:\n", in, out); - print_hexl((u32 *)in,length,0); + dprintk("RPC: krb5_decrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) goto out; @@ -117,6 +120,8 @@ krb5_decrypt( ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + dprintk("RPC: krb5_decrypt: output_data:\n"); + print_hexl((u32 *)out, length, 0); out: dprintk("gss_k5decrypt returns %d\n",ret); return(ret); @@ -132,13 +137,15 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) { /* checksum the plaintext data and the first 8 bytes of the krb5 token header, * as specified by the rfc: */ s32 -krb5_make_checksum(s32 cksumtype, char *header, char *body, int body_len, +krb5_make_checksum(s32 cksumtype, char *header, struct xdr_buf *body, struct xdr_netobj *cksum) { char *cksumname; struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ - struct scatterlist sg[2]; + struct scatterlist sg[1]; u32 code = GSS_S_FAILURE; + int len, thislen, offset; + int i; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -155,10 +162,36 @@ krb5_make_checksum(s32 cksumtype, char *header, char *body, int body_len, if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) goto out; - buf_to_sg(&sg[0], header, 8); - buf_to_sg(&sg[1], body, body_len); crypto_digest_init(tfm); - crypto_digest_update(tfm, sg, 2); + buf_to_sg(sg, header, 8); + crypto_digest_update(tfm, sg, 1); + if (body->head[0].iov_len) { + buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); + crypto_digest_update(tfm, sg, 1); + } + + len = body->page_len; + offset = body->page_base; + i = 0; + while (len) { + sg->page = body->pages[i]; + sg->offset = offset; + offset = 0; + if (PAGE_SIZE > len) + thislen = len; + else + thislen = PAGE_SIZE; + sg->length = thislen; + kmap(sg->page); /* XXX kmap_atomic? */ + crypto_digest_update(tfm, sg, 1); + kunmap(sg->page); + len -= thislen; + i++; + } + if (body->tail[0].iov_len) { + buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); + crypto_digest_update(tfm, sg, 1); + } crypto_digest_final(tfm, cksum->data); code = 0; out: diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 61282b4d9c3b..9913dac0f415 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -183,7 +183,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate) { u32 maj_stat = 0; @@ -202,13 +202,11 @@ gss_verify_mic_kerberos(struct gss_ctx *ctx, static u32 gss_get_mic_kerberos(struct gss_ctx *ctx, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token) { u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - if (!message->data) return GSS_S_FAILURE; - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); @@ -233,12 +231,14 @@ static int __init init_kerberos_module(void) printk("Failed to register kerberos gss mechanism!\n"); gm = gss_mech_get_by_OID(&gss_mech_krb5_oid); gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE); + gss_register_triple(RPC_AUTH_GSS_KRB5I, gm, 0, RPC_GSS_SVC_INTEGRITY); gss_mech_put(gm); return 0; } static void __exit cleanup_kerberos_module(void) { + gss_unregister_triple(RPC_AUTH_GSS_KRB5I); gss_unregister_triple(RPC_AUTH_GSS_KRB5); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index eaf19d7c8e25..52d4e78e117f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -80,7 +80,7 @@ gss_krb5_padding(int blocksize, int length) { u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_netobj * text, struct xdr_netobj * token, + struct xdr_buf *text, struct xdr_netobj *token, int toktype) { s32 checksum_type; @@ -134,24 +134,11 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); if (toktype == KG_TOK_WRAP_MSG) { - unsigned char pad = gss_krb5_padding(blocksize, text->len); - - get_random_bytes(msg_start, blocksize); /* "confounder" */ - memcpy(msg_start + blocksize, text->data, text->len); - - memset(msg_start + blocksize + text->len, pad, pad); - - if (krb5_make_checksum(checksum_type, krb5_hdr, msg_start, - tmsglen, &md5cksum)) - goto out_err; - - if (krb5_encrypt(ctx->enc, NULL, msg_start, msg_start, - tmsglen)) - goto out_err; - + /* XXX removing support for now */ + goto out_err; } else { /* Sign only. */ - if (krb5_make_checksum(checksum_type, krb5_hdr, text->data, - text->len, &md5cksum)) + if (krb5_make_checksum(checksum_type, krb5_hdr, text, + &md5cksum)) goto out_err; } diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8b2795d701db..0e1c7f70f841 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -75,20 +75,19 @@ * to return the decrypted data. */ +/* XXX will need to change prototype and/or just split into a separate function + * when we add privacy (because read_token will be in pages too). */ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_netobj *message_buffer, + struct xdr_buf *message_buffer, int *qop_state, int toktype) { int signalg; int sealalg; - struct xdr_netobj token = {.len = 0, .data = NULL}; s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; s32 now; - unsigned char *plain = NULL; - int plainlen = 0; int direction; s32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; @@ -100,10 +99,11 @@ krb5_read_token(struct krb5_ctx *ctx, if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, toktype, read_token->len)) goto out; + /* XXX sanity-check bodysize?? */ if (toktype == KG_TOK_WRAP_MSG) { - message_buffer->len = 0; - message_buffer->data = NULL; + /* XXX gone */ + goto out; } /* get the sign and seal algorithms */ @@ -135,43 +135,6 @@ krb5_read_token(struct krb5_ctx *ctx, signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) goto out; - if (toktype == KG_TOK_WRAP_MSG) { - int conflen = crypto_tfm_alg_blocksize(ctx->enc); - int padlen; - - plainlen = bodysize - (14 + KRB5_CKSUM_LENGTH); - plain = ptr + 14 + KRB5_CKSUM_LENGTH; - - ret = krb5_decrypt(ctx->enc, NULL, plain, plain, plainlen); - if (ret) - goto out; - - ret = GSS_S_FAILURE; - padlen = plain[plainlen -1]; - if ((padlen < 1) || (padlen > 8)) - goto out; - token.len = plainlen - conflen - padlen; - - if (token.len) { - token.data = kmalloc(token.len, GFP_KERNEL); - if (token.data == NULL) - goto out; - memcpy(token.data, plain + conflen, token.len); - } - - } else if (toktype == KG_TOK_MIC_MSG) { - token = *message_buffer; - plain = token.data; - plainlen = token.len; - } else { - printk("RPC: bad toktype in krb5_read_token"); - ret = GSS_S_FAILURE; - goto out; - } - - dprintk("RPC krb5_read_token: token.len %d plainlen %d\n", token.len, - plainlen); - /* compute the checksum of the message */ /* initialize the the cksum */ @@ -186,8 +149,8 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: - ret = krb5_make_checksum(checksum_type, ptr - 2, plain, - plainlen, &md5cksum); + ret = krb5_make_checksum(checksum_type, ptr - 2, + message_buffer, &md5cksum); if (ret) goto out; @@ -208,9 +171,6 @@ krb5_read_token(struct krb5_ctx *ctx, /* it got through unscathed. Make sure the context is unexpired */ - if (toktype == KG_TOK_WRAP_MSG) - *message_buffer = token; - if (qop_state) *qop_state = GSS_C_QOP_DEFAULT; @@ -234,7 +194,5 @@ krb5_read_token(struct krb5_ctx *ctx, ret = GSS_S_COMPLETE; out: if (md5cksum.data) kfree(md5cksum.data); - if ((toktype == KG_TOK_WRAP_MSG) && ret && token.data) - kfree(token.data); return ret; } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index b384cae37052..b360460defcd 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -196,7 +196,7 @@ gss_import_sec_context(struct xdr_netobj *input_token, u32 gss_get_mic(struct gss_ctx *context_handle, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops @@ -210,7 +210,7 @@ gss_get_mic(struct gss_ctx *context_handle, u32 gss_verify_mic(struct gss_ctx *context_handle, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index badaa121f29c..cc4bfb201807 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -568,7 +568,8 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = encode(req, p, task->tk_msg.rpc_argp)) < 0) { + if (encode && (status = rpcauth_wrap_req(task, encode, req, p, + task->tk_msg.rpc_argp)) < 0) { printk(KERN_WARNING "%s: can't encode arguments: %d\n", clnt->cl_protname, -status); rpc_exit(task, status); @@ -827,7 +828,8 @@ call_decode(struct rpc_task *task) task->tk_action = NULL; if (decode) - task->tk_status = decode(req, p, task->tk_msg.rpc_resp); + task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, + task->tk_msg.rpc_resp); dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, task->tk_status); return; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ff8d2bb7bb18..f6bde71024e5 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -126,6 +126,9 @@ EXPORT_SYMBOL(xdr_inline_pages); EXPORT_SYMBOL(xdr_shift_buf); EXPORT_SYMBOL(xdr_write_pages); EXPORT_SYMBOL(xdr_read_pages); +EXPORT_SYMBOL(xdr_buf_from_iov); +EXPORT_SYMBOL(xdr_buf_subsegment); +EXPORT_SYMBOL(xdr_buf_read_netobj); /* Debugging symbols */ #ifdef RPC_DEBUG diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 078dad8a90e5..00e0082704ba 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -538,7 +538,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) * Copies data into an arbitrary memory location from an array of pages * The copy is assumed to be non-overlapping. */ -static void +void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) { struct page **pgfrom; @@ -731,3 +731,145 @@ xdr_read_pages(struct xdr_stream *xdr, unsigned int len) xdr->p = (uint32_t *)((char *)iov->iov_base + padding); xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); } + +static struct iovec empty_iov = {.iov_base = NULL, .iov_len = 0}; + +void +xdr_buf_from_iov(struct iovec *iov, struct xdr_buf *buf) +{ + buf->head[0] = *iov; + buf->tail[0] = empty_iov; + buf->page_len = 0; + buf->len = iov->iov_len; +} + +/* Sets subiov to the intersection of iov with the buffer of length len + * starting base bytes after iov. Indicates empty intersection by setting + * length of subiov to zero. Decrements len by length of subiov, sets base + * to zero (or decrements it by length of iov if subiov is empty). */ +static void +iov_subsegment(struct iovec *iov, struct iovec *subiov, int *base, int *len) +{ + if (*base > iov->iov_len) { + subiov->iov_base = NULL; + subiov->iov_len = 0; + *base -= iov->iov_len; + } else { + subiov->iov_base = iov->iov_base + *base; + subiov->iov_len = min(*len, (int)iov->iov_len - *base); + *base = 0; + } + *len -= subiov->iov_len; +} + +/* Sets subbuf to the portion of buf of length len beginning base bytes + * from the start of buf. Returns -1 if base of length are out of bounds. */ +int +xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, + int base, int len) +{ + int i; + + subbuf->len = len; + iov_subsegment(buf->head, subbuf->head, &base, &len); + + if (base < buf->page_len) { + i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; + subbuf->pages = &buf->pages[i]; + subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; + subbuf->page_len = min((int)buf->page_len - base, len); + len -= subbuf->page_len; + base = 0; + } else { + base -= buf->page_len; + subbuf->page_len = 0; + } + + iov_subsegment(buf->tail, subbuf->tail, &base, &len); + if (base || len) + return -1; + return 0; +} + +/* obj is assumed to point to allocated memory of size at least len: */ +static int +read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +{ + struct xdr_buf subbuf; + int this_len; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(obj, subbuf.head[0].iov_base, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.page_len); + if (this_len) + _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(obj, subbuf.tail[0].iov_base, this_len); +out: + return status; +} + +static int +read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) +{ + u32 raw; + int status; + + status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); + if (status) + return status; + *obj = ntohl(raw); + return 0; +} + +/* If the netobj starting offset bytes from the start of xdr_buf is contained + * entirely in the head or the tail, set object to point to it; otherwise + * try to find space for it at the end of the tail, copy it there, and + * set obj to point to it. */ +int +xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) +{ + u32 tail_offset = buf->head[0].iov_len + buf->page_len; + u32 obj_end_offset; + + if (read_u32_from_xdr_buf(buf, offset, &obj->len)) + goto out; + obj_end_offset = offset + 4 + obj->len; + + if (obj_end_offset <= buf->head[0].iov_len) { + /* The obj is contained entirely in the head: */ + obj->data = buf->head[0].iov_base + offset + 4; + } else if (offset + 4 >= tail_offset) { + if (obj_end_offset - tail_offset + > buf->tail[0].iov_len) + goto out; + /* The obj is contained entirely in the tail: */ + obj->data = buf->tail[0].iov_base + + offset - tail_offset + 4; + } else { + /* use end of tail as storage for obj: + * (We don't copy to the beginning because then we'd have + * to worry about doing a potentially overlapping copy. + * This assumes the object is at most half the length of the + * tail.) */ + if (obj->len > buf->tail[0].iov_len) + goto out; + obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - + obj->len; + if (read_bytes_from_xdr_buf(buf, offset + 4, + obj->data, obj->len)) + goto out; + + } + return 0; +out: + return -1; +} -- cgit v1.2.3 From cb21a7182f9d73ad544f33f5cdec6d2be9a8cd4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:45:27 +0100 Subject: RPCSEC_GSS: Move the gss sequence number history from the task structure to the request structure, where it makes more sense. In particular, when we start storing more sequence number history (necessary to process responses to resent requests correctly), this will make it easier to initialize the necessary data structure in the right place (in xprt_request_init). --- include/linux/sunrpc/sched.h | 2 -- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 10 +++++----- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5e2d23e0ce6c..1113d7f3df13 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -48,8 +48,6 @@ struct rpc_task { __u8 tk_garb_retry, tk_cred_retry, tk_suid_retry; - u32 tk_gss_seqno; /* rpcsec_gss sequence number - used on this request */ /* * timeout_fn to be executed by timer bottom half diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e29381edeaea..8472b1c5ad2e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -95,6 +95,7 @@ struct rpc_rqst { struct rpc_rqst * rq_next; /* free list */ int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ + u32 rq_seqno; /* gss seq no. used on req. */ struct list_head rq_list; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1a4b9f504aa2..ef57f1942ea4 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -690,12 +690,12 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid) goto out_put_ctx; } spin_lock(&ctx->gc_seq_lock); - task->tk_gss_seqno = ctx->gc_seq++; + req->rq_seqno = ctx->gc_seq++; spin_unlock(&ctx->gc_seq_lock); *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); - *p++ = htonl((u32) task->tk_gss_seqno); + *p++ = htonl((u32) req->rq_seqno); *p++ = htonl((u32) service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = htonl((p - (cred_len + 1)) << 2); @@ -766,7 +766,7 @@ gss_validate(struct rpc_task *task, u32 *p) goto out_bad; if (flav != RPC_AUTH_GSS) goto out_bad; - seq = htonl(task->tk_gss_seqno); + seq = htonl(task->tk_rqstp->rq_seqno); iov.iov_base = &seq; iov.iov_len = sizeof(seq); xdr_buf_from_iov(&iov, &verf_buf); @@ -832,7 +832,7 @@ gss_wrap_req(struct rpc_task *task, integ_len = p++; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(task->tk_gss_seqno); + *p++ = htonl(req->rq_seqno); status = encode(rqstp, p, obj); if (status) @@ -909,7 +909,7 @@ gss_unwrap_resp(struct rpc_task *task, mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) goto out; - if (ntohl(*p++) != task->tk_gss_seqno) + if (ntohl(*p++) != req->rq_seqno) goto out; if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, -- cgit v1.2.3 From 8d51075e1a0b96f9b787df3e851fec5c9bb2ba4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:49:04 +0100 Subject: RPC: Add support for sharing the same RPC transport and credential caches between different mountpoints by allowing cloning of the rpc_client struct. --- include/linux/sunrpc/auth.h | 1 + include/linux/sunrpc/clnt.h | 23 ++++++++++------- net/sunrpc/auth.c | 13 ++++++++-- net/sunrpc/clnt.c | 61 ++++++++++++++++++++++++++++++++++++++++++--- net/sunrpc/pmap_clnt.c | 17 +++++++------ net/sunrpc/sunrpc_syms.c | 2 ++ net/sunrpc/xprt.c | 8 +++--- 7 files changed, 98 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a68f18bf0a46..1f83e0f5b9d3 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -73,6 +73,7 @@ struct rpc_auth { * differ from the flavor in * au_ops->au_flavor in gss * case) */ + atomic_t au_count; /* Reference counter */ /* per-flavor data */ }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 99d57fec03a9..917ec29d789b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -26,6 +26,8 @@ struct rpc_portmap { __u32 pm_vers; __u32 pm_prot; __u16 pm_port; + unsigned char pm_binding : 1; /* doing a getport() */ + struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ }; struct rpc_inode; @@ -34,6 +36,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { + atomic_t cl_count; /* Number of clones */ atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ @@ -48,26 +51,27 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ - cl_binding : 1,/* doing a getport() */ cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ - struct rpc_rtt cl_rtt; /* RTO estimator data */ - - struct rpc_portmap cl_pmap; /* port mapping */ - struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ + struct rpc_rtt * cl_rtt; /* RTO estimator data */ + struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ struct dentry * cl_dentry; /* inode */ + struct rpc_clnt * cl_parent; /* Points to parent of clones */ + struct rpc_rtt cl_rtt_default; + struct rpc_portmap cl_pmap_default; + char cl_inline_name[32]; }; #define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap.pm_prog -#define cl_vers cl_pmap.pm_vers -#define cl_port cl_pmap.pm_port -#define cl_prot cl_pmap.pm_prot +#define cl_prog cl_pmap->pm_prog +#define cl_vers cl_pmap->pm_vers +#define cl_port cl_pmap->pm_port +#define cl_prot cl_pmap->pm_prot /* * General RPC program info @@ -108,6 +112,7 @@ struct rpc_procinfo { struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a89b30cadff3..35900eb52b61 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -61,6 +61,7 @@ rpcauth_unregister(struct rpc_authops *ops) struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { + struct rpc_auth *auth; struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); @@ -68,13 +69,21 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) return NULL; if (!try_module_get(ops->owner)) return NULL; - clnt->cl_auth = ops->create(clnt, pseudoflavor); - return clnt->cl_auth; + auth = ops->create(clnt, pseudoflavor); + if (!auth) + return NULL; + atomic_set(&auth->au_count, 1); + if (clnt->cl_auth) + rpcauth_destroy(clnt->cl_auth); + clnt->cl_auth = auth; + return auth; } void rpcauth_destroy(struct rpc_auth *auth) { + if (!atomic_dec_and_test(&auth->au_count)) + return; auth->au_ops->destroy(auth); module_put(auth->au_ops->owner); kfree(auth); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cc4bfb201807..6c6a8310000a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -102,6 +102,7 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname, { struct rpc_version *version; struct rpc_clnt *clnt = NULL; + int len; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -116,23 +117,37 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname, goto out_no_clnt; memset(clnt, 0, sizeof(*clnt)); atomic_set(&clnt->cl_users, 0); + atomic_set(&clnt->cl_count, 1); + clnt->cl_parent = clnt; + + clnt->cl_server = clnt->cl_inline_name; + len = strlen(servname) + 1; + if (len > sizeof(clnt->cl_inline_name)) { + char *buf = kmalloc(len, GFP_KERNEL); + if (buf != 0) + clnt->cl_server = buf; + else + len = sizeof(clnt->cl_inline_name); + } + strlcpy(clnt->cl_server, servname, len); clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; - clnt->cl_server = servname; clnt->cl_protname = program->name; + clnt->cl_pmap = &clnt->cl_pmap_default; clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; - INIT_RPC_WAITQ(&clnt->cl_bindwait, "bindwait"); + INIT_RPC_WAITQ(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); if (!clnt->cl_port) clnt->cl_autobind = 1; - rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); + clnt->cl_rtt = &clnt->cl_rtt_default; + rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); if (rpc_setup_pipedir(clnt, program->pipe_dir_name) < 0) goto out_no_path; @@ -157,11 +172,39 @@ out_no_clnt: out_no_auth: rpc_rmdir(clnt->cl_pathname); out_no_path: + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); kfree(clnt); clnt = NULL; goto out; } +/* + * This function clones the RPC client structure. It allows us to share the + * same transport while varying parameters such as the authentication + * flavour. + */ +struct rpc_clnt * +rpc_clone_client(struct rpc_clnt *clnt) +{ + struct rpc_clnt *new; + + new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out_no_clnt; + memcpy(new, clnt, sizeof(*new)); + atomic_set(&new->cl_count, 1); + atomic_set(&new->cl_users, 0); + atomic_inc(&new->cl_parent->cl_count); + if (new->cl_auth) + atomic_inc(&new->cl_auth->au_count); +out: + return new; +out_no_clnt: + printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); + goto out; +} + /* * Properly shut down an RPC client, terminating all outstanding * requests. Note that we must be certain that cl_oneshot and @@ -201,19 +244,29 @@ rpc_shutdown_client(struct rpc_clnt *clnt) int rpc_destroy_client(struct rpc_clnt *clnt) { + if (!atomic_dec_and_test(&clnt->cl_count)) + return 1; + BUG_ON(atomic_read(&clnt->cl_users) != 0); + dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = NULL; } + if (clnt->cl_parent != clnt) { + rpc_destroy_client(clnt->cl_parent); + goto out_free; + } if (clnt->cl_pathname[0]) rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; } + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); +out_free: kfree(clnt); return 0; } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index ed627ea885e8..4b619e002123 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -41,7 +41,7 @@ static spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED; void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = &clnt->cl_pmap; + struct rpc_portmap *map = clnt->cl_pmap; struct sockaddr_in *sap = &clnt->cl_xprt->addr; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], @@ -57,12 +57,12 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) map->pm_prog, map->pm_vers, map->pm_prot); spin_lock(&pmap_lock); - if (clnt->cl_binding) { - rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0); + if (map->pm_binding) { + rpc_sleep_on(&map->pm_bindwait, task, NULL, 0); spin_unlock(&pmap_lock); return; } - clnt->cl_binding = 1; + map->pm_binding = 1; spin_unlock(&pmap_lock); task->tk_status = -EACCES; /* why set this? returns -EIO below */ @@ -85,8 +85,8 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) bailout: spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); task->tk_status = -EIO; task->tk_action = NULL; @@ -129,6 +129,7 @@ static void pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_portmap *map = clnt->cl_pmap; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); @@ -145,8 +146,8 @@ pmap_getport_done(struct rpc_task *task) clnt->cl_xprt->addr.sin_port = clnt->cl_port; } spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f6bde71024e5..dd8409baa09e 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -41,6 +41,7 @@ EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); +EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); @@ -66,6 +67,7 @@ EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); EXPORT_SYMBOL(rpcauth_unregister); +EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); EXPORT_SYMBOL(rpcauth_free_credcache); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ef616fe9c561..e6c5f7ab7968 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -584,9 +584,9 @@ xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) __xprt_put_cong(xprt, req); if (timer) { if (req->rq_ntrans == 1) - rpc_update_rtt(&clnt->cl_rtt, timer, + rpc_update_rtt(clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime); - rpc_set_timeo(&clnt->cl_rtt, timer, req->rq_ntrans - 1); + rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); } } @@ -1224,8 +1224,8 @@ xprt_transmit(struct rpc_task *task) spin_lock_bh(&xprt->sock_lock); if (!xprt->nocong) { int timer = task->tk_msg.rpc_proc->p_timer; - task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, timer); - task->tk_timeout <<= rpc_ntimeo(&clnt->cl_rtt, timer); + task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); + task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer); task->tk_timeout <<= clnt->cl_timeout.to_retries - req->rq_timeout.to_retries; if (task->tk_timeout > req->rq_timeout.to_maxval) -- cgit v1.2.3 From c2f2ea78e058ca74fe2e674fd8cdfb71966b248c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:50:51 +0100 Subject: NFSv4/RPCSEC_GSS: Make Frank's server->client_sys feature use RPC cloning in order to avoid duplicating sockets etc. Make NFSv4 share a single socket for all communication to the same server. --- fs/nfs/inode.c | 67 +++++++++++++++++++++++++++++++++++++++----------- fs/nfs/nfs4proc.c | 6 +---- fs/nfs/nfs4state.c | 4 +++ include/linux/nfs_fs.h | 15 +++++------ 4 files changed, 65 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index af022760107a..5cd28cb5076a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -493,10 +493,17 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) server->client = nfs_create_client(server, data); if (server->client == NULL) goto out_fail; - data->pseudoflavor = RPC_AUTH_UNIX; /* RFC 2623, sec 2.3.2 */ - server->client_sys = nfs_create_client(server, data); - if (server->client_sys == NULL) - goto out_shutdown; + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + server->client_sys = rpc_clone_client(server->client); + if (server->client_sys == NULL) + goto out_shutdown; + if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) + goto out_shutdown; + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { @@ -1349,6 +1356,7 @@ static struct file_system_type nfs_fs_type = { static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; + struct nfs4_client *clp = NULL; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; struct rpc_timeout timeparms; @@ -1398,13 +1406,13 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, return -EINVAL; } - /* Now create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (xprt == NULL) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); goto out_fail; } + /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { if (data->auth_flavourlen > 1) @@ -1414,34 +1422,61 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, goto out_fail; } } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); + + down_write(&clp->cl_sem); + if (clp->cl_rpcclient == NULL) { + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (xprt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + goto out_fail; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, authflavour); + if (clnt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto out_fail; + } + clnt->cl_chatty = 1; + clp->cl_rpcclient = clnt; + clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + } + clnt = rpc_clone_client(clp->cl_rpcclient); + server->nfs4_state = clp; + up_write(&clp->cl_sem); + if (clnt == NULL) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); goto out_fail; } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; - clnt->cl_chatty = 1; server->client = clnt; + if (clnt->cl_auth->au_flavor != authflavour) { + if (rpcauth_create(authflavour, clnt) == NULL) { + printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + goto out_shutdown; + } + } + /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); goto out_shutdown; } - if (create_nfsv4_state(server, data)) - goto out_shutdown; - if ((server->idmap = nfs_idmap_new(server)) == NULL) printk(KERN_WARNING "NFS: couldn't start IDmap\n"); err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; + clp = NULL; rpciod_down(); destroy_nfsv4_state(server); if (server->idmap != NULL) @@ -1449,6 +1484,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, out_shutdown: rpc_shutdown_client(server->client); out_fail: + if (clp) + nfs4_put_client(clp); return err; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f447a7a05449..2d6850d65dd3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -762,9 +762,7 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct qstr q; int status; - clp = server->nfs4_state = nfs4_get_client(&server->addr.sin_addr); - if (!clp) - return -ENOMEM; + clp = server->nfs4_state; down_write(&clp->cl_sem); /* Has the clientid already been initialized? */ @@ -850,8 +848,6 @@ no_setclientid: return status; out_unlock: up_write(&clp->cl_sem); - nfs4_put_client(clp); - server->nfs4_state = NULL; return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 42a2344a2893..960899db68e2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -93,6 +93,10 @@ nfs4_free_client(struct nfs4_client *clp) kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); + if (clp->cl_cred) + put_rpccred(clp->cl_cred); + if (clp->cl_rpcclient) + rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5ae592b26d63..1ba074302796 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -489,6 +489,14 @@ struct nfs4_client { int cl_nunused; spinlock_t cl_lock; atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; }; /* @@ -558,13 +566,6 @@ extern void nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp); struct nfs4_mount_data; -static inline int -create_nfsv4_state(struct nfs_server *server, struct nfs4_mount_data *data) -{ - server->nfs4_state = NULL; - return 0; -} - static inline void destroy_nfsv4_state(struct nfs_server *server) { -- cgit v1.2.3 From acac57debb7af23d6178e319c1017b24ec847ded Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:52:03 +0100 Subject: NFSv4: Convert the RENEW operation from using nfs4_compound, to being a standalone RPC call in preparation for the renew daemon overhaul. --- fs/nfs/nfs4proc.c | 59 +++++++++----------------------------------------- fs/nfs/nfs4renewd.c | 2 +- fs/nfs/nfs4xdr.c | 46 ++++++++++++++++++++++++++++++++++----- include/linux/nfs4.h | 1 + include/linux/nfs_fs.h | 2 +- 5 files changed, 53 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2d6850d65dd3..e7327e3bf484 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -428,18 +428,6 @@ nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new, cp->req_nops++; } -static void -nfs4_setup_renew(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, renew); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_RENEW; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - static void nfs4_setup_restorefh(struct nfs4_compound *cp) { @@ -1648,55 +1636,28 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) } /* - * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -struct renew_desc { - struct rpc_task task; - struct nfs4_compound compound; - struct nfs4_op ops[1]; -}; - static void renew_done(struct rpc_task *task) { - struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp; - process_lease(cp); -} - -static void -renew_release(struct rpc_task *task) -{ - kfree(task->tk_calldata); + struct nfs_server *server = (struct nfs_server *)task->tk_msg.rpc_resp; + unsigned long timestamp = (unsigned long)task->tk_calldata; + renew_lease(server, timestamp); } int -nfs4_proc_renew(struct nfs_server *server) +nfs4_proc_async_renew(struct nfs_server *server, struct rpc_cred *cred) { - struct renew_desc *rp; - struct rpc_task *task; - struct nfs4_compound *cp; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = server->nfs4_state, + .rpc_resp = server, + .rpc_cred = cred, }; - rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - return -ENOMEM; - cp = &rp->compound; - task = &rp->task; - - nfs4_setup_compound(cp, rp->ops, server, "renew"); - nfs4_setup_renew(cp); - - msg.rpc_argp = cp; - msg.rpc_resp = cp; - rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC); - rpc_call_setup(task, &msg, 0); - task->tk_calldata = rp; - task->tk_release = renew_release; - - return rpc_execute(task); + return rpc_call_async(server->client, &msg, 0, renew_done, (void *)jiffies); } /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 4ba871885dbc..92176b0b0a9b 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -70,7 +70,7 @@ renewd(struct rpc_task *task) timeout = (2 * lease) / 3 + last - jiffies; else { /* Queue an asynchronous RENEW. */ - nfs4_proc_renew(server); + nfs4_proc_async_renew(server, NULL); timeout = (2 * lease) / 3; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 35a28e9e7d21..add338c9be56 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -90,6 +90,8 @@ extern int nfs_stat_to_errno(int); #define decode_pre_write_getattr_maxsz op_decode_hdr_maxsz + 5 #define encode_post_write_getattr_maxsz op_encode_hdr_maxsz + 2 #define decode_post_write_getattr_maxsz op_decode_hdr_maxsz + 13 +#define encode_renew_maxsz op_encode_hdr_maxsz + 3 +#define decode_renew_maxsz op_decode_hdr_maxsz #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ @@ -159,6 +161,10 @@ extern int nfs_stat_to_errno(int); #define NFS4_dec_setattr_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 3 +#define NFS4_enc_renew_sz compound_encode_hdr_maxsz + \ + encode_renew_maxsz +#define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ + decode_renew_maxsz static struct { @@ -889,9 +895,6 @@ encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs case OP_RENAME: status = encode_rename(xdr, &cp->ops[i].u.rename); break; - case OP_RENEW: - status = encode_renew(xdr, cp->ops[i].u.renew); - break; case OP_RESTOREFH: status = encode_restorefh(xdr); break; @@ -1131,6 +1134,22 @@ out: return status; } +/* + * a RENEW request + */ +static int +nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_renew(&xdr, clp); +} + /* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" @@ -2137,9 +2156,6 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs case OP_RENAME: status = decode_rename(xdr, &op->u.rename); break; - case OP_RENEW: - status = decode_renew(xdr); - break; case OP_SAVEFH: status = decode_savefh(xdr); break; @@ -2387,6 +2403,23 @@ out: return status; } +/* + * Decode RENEW response + */ +static int +nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_renew(&xdr); + return status; +} + uint32_t * nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { @@ -2443,6 +2476,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), + PROC(RENEW, enc_renew, dec_renew), }; struct rpc_version nfs_version4 = { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8bb512eb2b43..1598d1b3c739 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -221,6 +221,7 @@ enum { NFSPROC4_CLNT_OPEN_CONFIRM, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, + NFSPROC4_CLNT_RENEW, }; #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1ba074302796..a8dc94ea12d3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -545,7 +545,7 @@ struct nfs4_state { /* nfs4proc.c */ -extern int nfs4_proc_renew(struct nfs_server *server); +extern int nfs4_proc_async_renew(struct nfs_server *server, struct rpc_cred *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); /* nfs4renewd.c */ -- cgit v1.2.3 From 7cf3d8b799b5a018b89bc7ce553d66c12aa50ade Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:53:15 +0100 Subject: NFSv4: Convert the lease renewal daemon from being per-mountpoint to being per-server. Instead of running it on top of rpciod, convert it to use keventd. This mean we can use the struct nfs4_client semaphores for ordering purposes. --- fs/nfs/inode.c | 17 ++++++-- fs/nfs/nfs4proc.c | 41 ++++++++++-------- fs/nfs/nfs4renewd.c | 108 +++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 26 +++++++++++ include/linux/nfs_fs.h | 35 +++++++-------- include/linux/nfs_fs_sb.h | 5 ++- 6 files changed, 154 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5cd28cb5076a..2c1df6550feb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -163,6 +163,8 @@ nfs_put_super(struct super_block *sb) nfs_idmap_delete(server); #endif /* CONFIG_NFS_V4 */ + nfs4_renewd_prepare_shutdown(server); + if (server->client != NULL) rpc_shutdown_client(server->client); if (server->client_sys != NULL) @@ -1281,6 +1283,8 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); root = &server->fh; memcpy(root, &data->root, sizeof(*root)); @@ -1444,13 +1448,15 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); } + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); server->nfs4_state = clp; up_write(&clp->cl_sem); + clp = NULL; if (clnt == NULL) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - goto out_fail; + goto out_remove_list; } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; @@ -1476,13 +1482,16 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; - clp = NULL; rpciod_down(); - destroy_nfsv4_state(server); if (server->idmap != NULL) nfs_idmap_delete(server); out_shutdown: rpc_shutdown_client(server->client); +out_remove_list: + down_write(&server->nfs4_state->cl_sem); + list_del_init(&server->nfs4_siblings); + up_write(&server->nfs4_state->cl_sem); + destroy_nfsv4_state(server); out_fail: if (clp) nfs4_put_client(clp); @@ -1542,6 +1551,8 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e7327e3bf484..a3ac8370ba3c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -56,8 +56,6 @@ extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED; - static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -480,10 +478,11 @@ nfs4_setup_setclientid_confirm(struct nfs4_compound *cp) static void renew_lease(struct nfs_server *server, unsigned long timestamp) { - spin_lock(&renew_lock); - if (time_before(server->last_renewal,timestamp)) - server->last_renewal = timestamp; - spin_unlock(&renew_lock); + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } static inline void @@ -748,6 +747,7 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo fsinfo; unsigned char * p; struct qstr q; + unsigned long last_renewed; int status; clp = server->nfs4_state; @@ -773,6 +773,7 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ nfs4_setup_compound(&compound, ops, server, "setclientid"); nfs4_setup_setclientid(&compound, 0, 0); + last_renewed = jiffies; if ((status = nfs4_call_compound(&compound, NULL, 0))) goto out_unlock; @@ -786,20 +787,22 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, nfs4_setup_putrootfh(&compound); nfs4_setup_getrootattr(&compound, fattr, &fsinfo); nfs4_setup_getfh(&compound, fhandle); + last_renewed = jiffies; if ((status = nfs4_call_compound(&compound, NULL, 0))) goto out_unlock; - clp->cl_state = NFS4CLNT_OK; -no_setclientid: /* * Now that we have instantiated the clientid and determined * the lease time, we can initialize the renew daemon for this * server. * FIXME: we only need one renewd daemon per server. */ - server->lease_time = fsinfo.lease_time * HZ; - if ((status = nfs4_init_renewd(server))) - goto out_unlock; + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = last_renewed; + nfs4_schedule_state_renewal(clp); + clp->cl_state = NFS4CLNT_OK; + +no_setclientid: up_write(&clp->cl_sem); /* @@ -1642,22 +1645,24 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) static void renew_done(struct rpc_task *task) { - struct nfs_server *server = (struct nfs_server *)task->tk_msg.rpc_resp; + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; unsigned long timestamp = (unsigned long)task->tk_calldata; - renew_lease(server, timestamp); + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } int -nfs4_proc_async_renew(struct nfs_server *server, struct rpc_cred *cred) +nfs4_proc_async_renew(struct nfs4_client *clp) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], - .rpc_argp = server->nfs4_state, - .rpc_resp = server, - .rpc_cred = cred, + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, }; - return rpc_call_async(server->client, &msg, 0, renew_done, (void *)jiffies); + return rpc_call_async(clp->cl_rpcclient, &msg, 0, renew_done, (void *)jiffies); } /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 92176b0b0a9b..667e06f1c647 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -54,53 +54,91 @@ #include #include -static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue"); +#define NFSDBG_FACILITY NFSDBG_PROC -static void -renewd(struct rpc_task *task) +void +nfs4_renew_state(void *data) { - struct nfs_server *server = (struct nfs_server *)task->tk_calldata; - unsigned long lease = server->lease_time; - unsigned long last = server->last_renewal; - unsigned long timeout; + struct nfs4_client *clp = (struct nfs4_client *)data; + long lease, timeout; + unsigned long last, now; - if (!server->nfs4_state) - timeout = (2 * lease) / 3; - else if (jiffies < last + lease/3) - timeout = (2 * lease) / 3 + last - jiffies; - else { + down_read(&clp->cl_sem); + dprintk("%s: start\n", __FUNCTION__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + spin_unlock(&clp->cl_lock); /* Queue an asynchronous RENEW. */ - nfs4_proc_async_renew(server, NULL); + nfs4_proc_async_renew(clp); timeout = (2 * lease) / 3; - } - + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __FUNCTION__); if (timeout < 5 * HZ) /* safeguard */ timeout = 5 * HZ; - task->tk_timeout = timeout; - task->tk_action = renewd; - task->tk_exit = NULL; - rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL); - return; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + dprintk("%s: done\n", __FUNCTION__); } -int -nfs4_init_renewd(struct nfs_server *server) +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_schedule_state_renewal(struct nfs4_client *clp) { - struct rpc_task *task; - int status; + long timeout; - lock_kernel(); - status = -ENOMEM; - task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC); - if (!task) - goto out; - task->tk_calldata = server; - task->tk_action = renewd; - status = rpc_execute(task); + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +} -out: - unlock_kernel(); - return status; +void +nfs4_renewd_prepare_shutdown(struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return; + flush_scheduled_work(); + down_write(&clp->cl_sem); + if (!list_empty(&server->nfs4_siblings)) + list_del_init(&server->nfs4_siblings); + up_write(&clp->cl_sem); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_kill_renewd(struct nfs4_client *clp) +{ + down_read(&clp->cl_sem); + if (!list_empty(&clp->cl_superblocks)) { + up_read(&clp->cl_sem); + return; + } + cancel_delayed_work(&clp->cl_renewd); + up_read(&clp->cl_sem); + flush_scheduled_work(); } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 960899db68e2..bded88b8d3eb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -41,6 +41,7 @@ #include #include #include +#include #define OPENOWNER_POOL_SIZE 8 @@ -55,6 +56,28 @@ nfs4_stateid one_stateid = static LIST_HEAD(nfs4_clientid_list); +extern void nfs4_renew_state(void *); + +void +init_nfsv4_state(struct nfs_server *server) +{ + server->nfs4_state = NULL; + INIT_LIST_HEAD(&server->nfs4_siblings); +} + +void +destroy_nfsv4_state(struct nfs_server *server) +{ + if (server->mnt_path) { + kfree(server->mnt_path); + server->mnt_path = NULL; + } + if (server->nfs4_state) { + nfs4_put_client(server->nfs4_state); + server->nfs4_state = NULL; + } +} + /* * nfs4_get_client(): returns an empty client structure * nfs4_put_client(): drops reference to client structure @@ -75,6 +98,8 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_state = NFS4CLNT_NEW; } return clp; @@ -130,6 +155,7 @@ nfs4_put_client(struct nfs4_client *clp) return; list_del(&clp->cl_servers); spin_unlock(&state_spinlock); + nfs4_kill_renewd(clp); nfs4_free_client(clp); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a8dc94ea12d3..4996221041a4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -28,6 +28,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -493,6 +494,12 @@ struct nfs4_client { struct rpc_clnt * cl_rpcclient; struct rpc_cred * cl_cred; + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. */ @@ -545,13 +552,17 @@ struct nfs4_state { /* nfs4proc.c */ -extern int nfs4_proc_async_renew(struct nfs_server *server, struct rpc_cred *); +extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); /* nfs4renewd.c */ -extern int nfs4_init_renewd(struct nfs_server *server); +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); /* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); @@ -560,29 +571,13 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp); - - - - - struct nfs4_mount_data; -static inline void -destroy_nfsv4_state(struct nfs_server *server) -{ - if (server->mnt_path) { - kfree(server->mnt_path); - server->mnt_path = NULL; - } - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; - } -} #else -#define create_nfsv4_state(server, data) 0 +#define init_nfsv4_state(server) do { } while (0) #define destroy_nfsv4_state(server) do { } while (0) #define nfs4_put_state_owner(inode, owner) do { } while (0) #define nfs4_put_open_state(state) do { } while (0) +#define nfs4_renewd_prepare_shutdown(server) do { } while (0) #endif #endif /* __KERNEL__ */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20ceb626cb3b..5f0b0ce3aa2c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -35,8 +35,9 @@ struct nfs_server { char ip_addr[16]; char * mnt_path; struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ - unsigned long lease_time; /* in jiffies */ - unsigned long last_renewal; /* in jiffies */ + struct list_head nfs4_siblings; /* List of other nfs_server structs + * that share the same clientid + */ void *idmap; #endif }; -- cgit v1.2.3 From f414757e25c8122dcf6ac1b546f67b6317e161b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:54:20 +0100 Subject: NFSv4: Split out the code for retrieving static server information out of the GETATTR compound. --- fs/nfs/nfs4proc.c | 73 +++++----------------- fs/nfs/nfs4xdr.c | 160 ++++++++++++++++++++++++++++++++++++++---------- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 1 - 4 files changed, 144 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a3ac8370ba3c..264ef919baee 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -56,6 +56,9 @@ extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; +static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + + static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -177,44 +180,16 @@ u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_fsinfo_bitmap[2] = { - FATTR4_WORD0_MAXFILESIZE - | FATTR4_WORD0_MAXREAD - | FATTR4_WORD0_MAXWRITE - | FATTR4_WORD0_LEASE_TIME, - 0 -}; - u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 }; -/* mount bitmap: fattr bitmap + lease time */ -u32 nfs4_mount_bitmap[2] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE - | FATTR4_WORD0_SIZE - | FATTR4_WORD0_FSID - | FATTR4_WORD0_FILEID - | FATTR4_WORD0_LEASE_TIME, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER - | FATTR4_WORD1_OWNER_GROUP - | FATTR4_WORD1_RAWDEV - | FATTR4_WORD1_SPACE_USED - | FATTR4_WORD1_TIME_ACCESS - | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY -}; - static inline void __nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, struct nfs_fattr *fattr, struct nfs_fsstat *fsstat, - struct nfs_fsinfo *fsinfo, struct nfs_pathconf *pathconf) { struct nfs4_getattr *getattr = GET_OP(cp, getattr); @@ -222,7 +197,6 @@ __nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, getattr->gt_bmval = bitmap; getattr->gt_attrs = fattr; getattr->gt_fsstat = fsstat; - getattr->gt_fsinfo = fsinfo; getattr->gt_pathconf = pathconf; OPNUM(cp) = OP_GETATTR; @@ -234,16 +208,7 @@ nfs4_setup_getattr(struct nfs4_compound *cp, struct nfs_fattr *fattr) { __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, - NULL, NULL, NULL); -} - -static void -nfs4_setup_getrootattr(struct nfs4_compound *cp, - struct nfs_fattr *fattr, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_mount_bitmap, - fattr, NULL, fsinfo, NULL); + NULL, NULL); } static void @@ -251,15 +216,7 @@ nfs4_setup_statfs(struct nfs4_compound *cp, struct nfs_fsstat *fsstat) { __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, - NULL, fsstat, NULL, NULL); -} - -static void -nfs4_setup_fsinfo(struct nfs4_compound *cp, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap, - NULL, NULL, fsinfo, NULL); + NULL, fsstat, NULL); } static void @@ -267,7 +224,7 @@ nfs4_setup_pathconf(struct nfs4_compound *cp, struct nfs_pathconf *pathconf) { __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, - NULL, NULL, NULL, pathconf); + NULL, NULL, pathconf); } static void @@ -759,7 +716,7 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, fattr->valid = 0; nfs4_setup_compound(&compound, ops, server, "getrootfh"); nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); + nfs4_setup_getattr(&compound, fattr); nfs4_setup_getfh(&compound, fhandle); if ((status = nfs4_call_compound(&compound, NULL, 0))) goto out_unlock; @@ -785,7 +742,7 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, nfs4_setup_compound(&compound, ops, server, "setclientid_confirm"); nfs4_setup_setclientid_confirm(&compound); nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); + nfs4_setup_getattr(&compound, fattr); nfs4_setup_getfh(&compound, fhandle); last_renewed = jiffies; if ((status = nfs4_call_compound(&compound, NULL, 0))) @@ -797,6 +754,8 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, * server. * FIXME: we only need one renewd daemon per server. */ + if ((status = nfs4_proc_fsinfo(server, fhandle, &fsinfo))) + goto out_unlock; clp->cl_lease_time = fsinfo.lease_time * HZ; clp->cl_last_renewal = last_renewed; nfs4_schedule_state_renewal(clp); @@ -1434,14 +1393,14 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = fsinfo, + }; memset(fsinfo, 0, sizeof(*fsinfo)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_fsinfo(&compound, fsinfo); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static int diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index add338c9be56..ed79661601dd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -90,6 +90,8 @@ extern int nfs_stat_to_errno(int); #define decode_pre_write_getattr_maxsz op_decode_hdr_maxsz + 5 #define encode_post_write_getattr_maxsz op_encode_hdr_maxsz + 2 #define decode_post_write_getattr_maxsz op_decode_hdr_maxsz + 13 +#define encode_fsinfo_maxsz op_encode_hdr_maxsz + 2 +#define decode_fsinfo_maxsz op_decode_hdr_maxsz + 11 #define encode_renew_maxsz op_encode_hdr_maxsz + 3 #define decode_renew_maxsz op_decode_hdr_maxsz @@ -161,6 +163,12 @@ extern int nfs_stat_to_errno(int); #define NFS4_dec_setattr_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 3 +#define NFS4_enc_fsinfo_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_fsinfo_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_fsinfo_maxsz #define NFS4_enc_renew_sz compound_encode_hdr_maxsz + \ encode_renew_maxsz #define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ @@ -506,6 +514,15 @@ encode_post_write_getattr(struct xdr_stream *xdr) FATTR4_WORD1_TIME_MODIFY); } +static int +encode_fsinfo(struct xdr_stream *xdr) +{ + return encode_getattr_one(xdr, FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME); +} + static int encode_getfh(struct xdr_stream *xdr) { @@ -1134,6 +1151,26 @@ out: return status; } +/* + * FSINFO request + */ +static int +nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, void *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + /* * a RENEW request */ @@ -1312,7 +1349,6 @@ decode_create(struct xdr_stream *xdr, struct nfs4_create *create) } extern uint32_t nfs4_fattr_bitmap[2]; -extern uint32_t nfs4_fsinfo_bitmap[2]; extern uint32_t nfs4_fsstat_bitmap[2]; extern uint32_t nfs4_pathconf_bitmap[2]; @@ -1322,7 +1358,6 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, { struct nfs_fattr *nfp = getattr->gt_attrs; struct nfs_fsstat *fsstat = getattr->gt_fsstat; - struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo; struct nfs_pathconf *pathconf = getattr->gt_pathconf; uint32_t attrlen, dummy32, bmlen, bmval0 = 0, @@ -1368,11 +1403,6 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, nfp->nlink = 1; nfp->timestamp = jiffies; } - if (fsinfo) { - fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ - fsinfo->lease_time = 60; - } - if (bmval0 & FATTR4_WORD0_TYPE) { READ_BUF(4); len += 4; @@ -1406,12 +1436,6 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, (long long)nfp->fsid_u.nfs4.major, (long long)nfp->fsid_u.nfs4.minor); } - if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - READ_BUF(4); - len += 4; - READ32(fsinfo->lease_time); - dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); - } if (bmval0 & FATTR4_WORD0_FILEID) { READ_BUF(8); len += 8; @@ -1436,12 +1460,6 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, READ64(fsstat->tfiles); dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); } - if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->maxfilesize); - dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); - } if (bmval0 & FATTR4_WORD0_MAXLINK) { READ_BUF(4); len += 4; @@ -1454,20 +1472,6 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, READ32(pathconf->max_namelen); dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); } - if (bmval0 & FATTR4_WORD0_MAXREAD) { - READ_BUF(8); - len += 8; - READ64(fsinfo->rtmax); - fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; - dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); - } - if (bmval0 & FATTR4_WORD0_MAXWRITE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->wtmax); - fsinfo->wtpref = fsinfo->wtmax; - dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); - } if (bmval1 & FATTR4_WORD1_MODE) { READ_BUF(4); @@ -1709,6 +1713,74 @@ out_bad_bitmap: } +static int +decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +{ + uint32_t *p; + uint32_t len, attrlen, bmlen, bmval0 = 0, bmval1 = 0; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + if (bmlen < 1) + return -EIO; + READ_BUF(bmlen << 2); + READ32(bmval0); + if (bmval0 & ~(FATTR4_WORD0_MAXFILESIZE|FATTR4_WORD0_MAXREAD| + FATTR4_WORD0_MAXWRITE|FATTR4_WORD0_LEASE_TIME)) + goto out_bad_bitmap; + if (bmlen > 1) { + READ32(bmval1); + if (bmval1 != 0 || bmlen > 2) + goto out_bad_bitmap; + } + READ_BUF(4); + READ32(attrlen); + READ_BUF(attrlen); + fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ + fsinfo->lease_time = 60; + len = attrlen; + + if (bmval0 & FATTR4_WORD0_LEASE_TIME) { + len -= 4; + READ32(fsinfo->lease_time); + dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); + } + if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { + len -= 8; + READ64(fsinfo->maxfilesize); + dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); + } + if (bmval0 & FATTR4_WORD0_MAXREAD) { + len -= 8; + READ64(fsinfo->rtmax); + fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; + dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); + } + if (bmval0 & FATTR4_WORD0_MAXWRITE) { + len -= 8; + READ64(fsinfo->wtmax); + fsinfo->wtpref = fsinfo->wtmax; + dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); + } + if (len != 0) + goto out_bad_attrlen; + return 0; +out_bad_attrlen: + printk(KERN_NOTICE "%s: server attribute length %u does not match bitmap 0x%x/0x%x\n", + __FUNCTION__, (unsigned int)attrlen, + (unsigned int) bmval0, (unsigned int)bmval1); + return -EIO; +out_bad_bitmap: + printk(KERN_NOTICE "%s: server returned bad attribute bitmap 0x%x/0x%x\n", + __FUNCTION__, + (unsigned int)bmval0, (unsigned int)bmval1); + return -EIO; +} + static int decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh) { @@ -2403,6 +2475,27 @@ out: return status; } +/* + * FSINFO request + */ +static int +nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + /* * Decode RENEW response */ @@ -2476,6 +2569,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), PROC(RENEW, enc_renew, dec_renew), }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 1598d1b3c739..d535a46f7d4f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -221,6 +221,7 @@ enum { NFSPROC4_CLNT_OPEN_CONFIRM, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, + NFSPROC4_CLNT_FSINFO, NFSPROC4_CLNT_RENEW, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5bc59e4db5e3..09c2dd2216d0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -449,7 +449,6 @@ struct nfs4_getattr { u32 * gt_bmval; /* request */ struct nfs_fattr * gt_attrs; /* response */ struct nfs_fsstat * gt_fsstat; /* response */ - struct nfs_fsinfo * gt_fsinfo; /* response */ struct nfs_pathconf * gt_pathconf; /* response */ }; -- cgit v1.2.3 From 88e4b0f2c019ff2f4cee0045812bab0f8146f4ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:56:12 +0100 Subject: NFSv4: Convert SETCLIENTID and SETCLIENTID_CONFIRM to be standalone operations. Ensure that SETCLIENTID_CONFIRM always returns the lease timeout length. --- fs/nfs/nfs4proc.c | 127 ++++++++++++++++++++++---------------------- fs/nfs/nfs4xdr.c | 136 ++++++++++++++++++++++++++++++++++++++++++------ include/linux/nfs4.h | 2 + include/linux/nfs_fs.h | 2 + include/linux/nfs_xdr.h | 2 - 5 files changed, 186 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 264ef919baee..8e8ac9cc08f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -56,9 +56,6 @@ extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - - static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -397,41 +394,6 @@ nfs4_setup_savefh(struct nfs4_compound *cp) cp->req_nops++; } -static void -nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port) -{ - struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid); - struct nfs_server *server = cp->server; - struct timespec tv; - u32 *p; - - tv = CURRENT_TIME; - p = (u32 *)setclientid->sc_verifier.data; - *p++ = tv.tv_sec; - *p++ = tv.tv_nsec; - setclientid->sc_name = server->ip_addr; - sprintf(setclientid->sc_netid, "udp"); - sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255); - setclientid->sc_prog = program; - setclientid->sc_cb_ident = 0; - setclientid->sc_state = server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID; - cp->req_nops++; -} - -static void -nfs4_setup_setclientid_confirm(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, setclientid_confirm); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID_CONFIRM; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - static void renew_lease(struct nfs_server *server, unsigned long timestamp) { @@ -701,51 +663,31 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs4_client *clp; struct nfs4_compound compound; struct nfs4_op ops[4]; - struct nfs_fsinfo fsinfo; unsigned char * p; struct qstr q; - unsigned long last_renewed; int status; clp = server->nfs4_state; down_write(&clp->cl_sem); /* Has the clientid already been initialized? */ - if (clp->cl_state != NFS4CLNT_NEW) { + if (clp->cl_state != NFS4CLNT_NEW) /* Yep, so just read the root attributes and the lease time. */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "getrootfh"); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; goto no_setclientid; - } /* * SETCLIENTID. * Until delegations are imported, we don't bother setting the program * number and port to anything meaningful. */ - nfs4_setup_compound(&compound, ops, server, "setclientid"); - nfs4_setup_setclientid(&compound, 0, 0); - last_renewed = jiffies; - if ((status = nfs4_call_compound(&compound, NULL, 0))) + if ((status = nfs4_proc_setclientid(clp, 0, 0))) goto out_unlock; /* * SETCLIENTID_CONFIRM, plus root filehandle. * We also get the lease time here. */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "setclientid_confirm"); - nfs4_setup_setclientid_confirm(&compound); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - last_renewed = jiffies; - if ((status = nfs4_call_compound(&compound, NULL, 0))) + if ((status = nfs4_proc_setclientid_confirm(clp))) goto out_unlock; /* @@ -754,10 +696,6 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, * server. * FIXME: we only need one renewd daemon per server. */ - if ((status = nfs4_proc_fsinfo(server, fhandle, &fsinfo))) - goto out_unlock; - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = last_renewed; nfs4_schedule_state_renewal(clp); clp->cl_state = NFS4CLNT_OK; @@ -770,6 +708,13 @@ no_setclientid: * catch an ERR_WRONGSEC if it occurs along the way... */ p = server->mnt_path; + fattr->valid = 0; + nfs4_setup_compound(&compound, ops, server, "getrootfh"); + nfs4_setup_putrootfh(&compound); + nfs4_setup_getattr(&compound, fattr); + nfs4_setup_getfh(&compound, fhandle); + if ((status = nfs4_call_compound(&compound, NULL, 0))) + goto out; for (;;) { while (*p == '/') p++; @@ -798,6 +743,7 @@ no_setclientid: return status; out_unlock: up_write(&clp->cl_sem); +out: return status; } @@ -1724,6 +1670,57 @@ nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *pa return 1; } +int +nfs4_proc_setclientid(struct nfs4_client *clp, + u32 program, unsigned short port) +{ + u32 *p; + struct nfs4_setclientid setclientid; + struct timespec tv; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = clp->cl_cred, + }; + + tv = CURRENT_TIME; + p = (u32*)setclientid.sc_verifier.data; + *p++ = (u32)tv.tv_sec; + *p = (u32)tv.tv_nsec; + setclientid.sc_name = clp->cl_ipaddr; + sprintf(setclientid.sc_netid, "tcp"); + sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); + setclientid.sc_prog = htonl(program); + setclientid.sc_cb_ident = 0; + + return rpc_call_sync(clp->cl_rpcclient, &msg, 0); +} + +int +nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = clp->cl_cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .getroot = nfs4_proc_get_root, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ed79661601dd..cb96666b3831 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -73,6 +73,8 @@ extern int nfs_stat_to_errno(int); #define encode_putfh_maxsz op_encode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) #define decode_putfh_maxsz op_decode_hdr_maxsz +#define encode_putrootfh_maxsz op_encode_hdr_maxsz +#define decode_putrootfh_maxsz op_decode_hdr_maxsz #define encode_getfh_maxsz op_encode_hdr_maxsz #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) @@ -94,6 +96,21 @@ extern int nfs_stat_to_errno(int); #define decode_fsinfo_maxsz op_decode_hdr_maxsz + 11 #define encode_renew_maxsz op_encode_hdr_maxsz + 3 #define decode_renew_maxsz op_decode_hdr_maxsz +#define encode_setclientid_maxsz \ + op_encode_hdr_maxsz + \ + 4 /*server->ip_addr*/ + \ + 1 /*Netid*/ + \ + 6 /*uaddr*/ + \ + 6 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_maxsz \ + op_decode_hdr_maxsz + \ + 2 + \ + 1024 /* large value for CLID_INUSE */ +#define encode_setclientid_confirm_maxsz \ + op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_confirm_maxsz \ + op_decode_hdr_maxsz #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ @@ -173,6 +190,20 @@ extern int nfs_stat_to_errno(int); encode_renew_maxsz #define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ decode_renew_maxsz +#define NFS4_enc_setclientid_sz compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz +#define NFS4_dec_setclientid_sz compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz +#define NFS4_enc_setclientid_confirm_sz \ + compound_encode_hdr_maxsz + \ + encode_setclientid_confirm_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_setclientid_confirm_sz \ + compound_decode_hdr_maxsz + \ + decode_setclientid_confirm_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz static struct { @@ -918,12 +949,6 @@ encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs case OP_SAVEFH: status = encode_savefh(xdr); break; - case OP_SETCLIENTID: - status = encode_setclientid(xdr, &cp->ops[i].u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = encode_setclientid_confirm(xdr, cp->ops[i].u.setclientid_confirm); - break; default: BUG(); } @@ -1187,6 +1212,46 @@ nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) return encode_renew(&xdr, clp); } +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_setclientid *sc) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_setclientid(&xdr, sc); +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_setclientid_confirm(&xdr, clp); + if (!status) + status = encode_putrootfh(&xdr); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + /* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" @@ -2098,7 +2163,7 @@ decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) } static int -decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) +decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) { uint32_t *p; uint32_t opnum; @@ -2114,9 +2179,9 @@ decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) } READ32(nfserr); if (nfserr == NFS_OK) { - READ_BUF(8 + sizeof(setclientid->sc_state->cl_confirm.data)); - READ64(setclientid->sc_state->cl_clientid); - COPYMEM(setclientid->sc_state->cl_confirm.data, sizeof(setclientid->sc_state->cl_confirm.data)); + READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ64(clp->cl_clientid); + COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); } else if (nfserr == NFSERR_CLID_INUSE) { uint32_t len; @@ -2231,12 +2296,6 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs case OP_SAVEFH: status = decode_savefh(xdr); break; - case OP_SETCLIENTID: - status = decode_setclientid(xdr, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = decode_setclientid_confirm(xdr); - break; default: BUG(); return -EIO; @@ -2513,6 +2572,49 @@ nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) return status; } +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid(&xdr, clp); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid_confirm(&xdr); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + uint32_t * nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { @@ -2571,6 +2673,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SETATTR, enc_setattr, dec_setattr), PROC(FSINFO, enc_fsinfo, dec_fsinfo), PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), }; struct rpc_version nfs_version4 = { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index d535a46f7d4f..a601ae91de21 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -223,6 +223,8 @@ enum { NFSPROC4_CLNT_SETATTR, NFSPROC4_CLNT_FSINFO, NFSPROC4_CLNT_RENEW, + NFSPROC4_CLNT_SETCLIENTID, + NFSPROC4_CLNT_SETCLIENTID_CONFIRM, }; #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4996221041a4..fcaf3322cadf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -552,6 +552,8 @@ struct nfs4_state { /* nfs4proc.c */ +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 09c2dd2216d0..244a430f83ef 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -555,8 +555,6 @@ struct nfs4_op { struct nfs4_rename rename; struct nfs4_client * renew; struct nfs4_setattr setattr; - struct nfs4_setclientid setclientid; - struct nfs4_client * setclientid_confirm; } u; }; -- cgit v1.2.3 From 2d5e9ebc702452d30ca0a9c50b9e3f0d8a56a4e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:57:13 +0100 Subject: NFSv4: Don't translate those NFSv4 errors that are needed by the kernel itself into EIO. Fix a signed/unsigned bug in nfs4_increment_seqid. --- fs/nfs/nfs4state.c | 4 +-- fs/nfs/nfs4xdr.c | 63 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/nfs4.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 2 +- 4 files changed, 135 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bded88b8d3eb..3bdb3a016e25 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -376,9 +376,9 @@ nfs4_put_open_state(struct nfs4_state *state) * see comments nfs_fs.h:seqid_mutating_error() */ void -nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp) +nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { - if (status == NFS_OK || seqid_mutating_err(status)) + if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cb96666b3831..67a8e7cf403b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -57,7 +57,7 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -extern int nfs_stat_to_errno(int); +static int nfs_stat_to_errno(int); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -2650,6 +2650,67 @@ nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) return p; } +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, errno_NFSERR_IO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +static int +nfs_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a601ae91de21..f56601a5825d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -88,6 +88,76 @@ enum nfs_opnum4 { OP_WRITE = 38, }; +enum nfsstat4 { + NFS4_OK = 0, + NFS4ERR_PERM = 1, + NFS4ERR_NOENT = 2, + NFS4ERR_IO = 5, + NFS4ERR_NXIO = 6, + NFS4ERR_ACCESS = 13, + NFS4ERR_EXIST = 17, + NFS4ERR_XDEV = 18, + /* Unused/reserved 19 */ + NFS4ERR_NOTDIR = 20, + NFS4ERR_ISDIR = 21, + NFS4ERR_INVAL = 22, + NFS4ERR_FBIG = 27, + NFS4ERR_NOSPC = 28, + NFS4ERR_ROFS = 30, + NFS4ERR_MLINK = 31, + NFS4ERR_NAMETOOLONG = 63, + NFS4ERR_NOTEMPTY = 66, + NFS4ERR_DQUOT = 69, + NFS4ERR_STALE = 70, + NFS4ERR_BADHANDLE = 10001, + NFS4ERR_BAD_COOKIE = 10003, + NFS4ERR_NOTSUPP = 10004, + NFS4ERR_TOOSMALL = 10005, + NFS4ERR_SERVERFAULT = 10006, + NFS4ERR_BADTYPE = 10007, + NFS4ERR_DELAY = 10008, + NFS4ERR_SAME = 10009, + NFS4ERR_DENIED = 10010, + NFS4ERR_EXPIRED = 10011, + NFS4ERR_LOCKED = 10012, + NFS4ERR_GRACE = 10013, + NFS4ERR_FHEXPIRED = 10014, + NFS4ERR_SHARE_DENIED = 10015, + NFS4ERR_WRONGSEC = 10016, + NFS4ERR_CLID_INUSE = 10017, + NFS4ERR_RESOURCE = 10018, + NFS4ERR_MOVED = 10019, + NFS4ERR_NOFILEHANDLE = 10020, + NFS4ERR_MINOR_VERS_MISMATCH = 10021, + NFS4ERR_STALE_CLIENTID = 10022, + NFS4ERR_STALE_STATEID = 10023, + NFS4ERR_OLD_STATEID = 10024, + NFS4ERR_BAD_STATEID = 10025, + NFS4ERR_BAD_SEQID = 10026, + NFS4ERR_NOT_SAME = 10027, + NFS4ERR_LOCK_RANGE = 10028, + NFS4ERR_SYMLINK = 10029, + NFS4ERR_RESTOREFH = 10030, + NFS4ERR_LEASE_MOVED = 10031, + NFS4ERR_ATTRNOTSUPP = 10032, + NFS4ERR_NO_GRACE = 10033, + NFS4ERR_RECLAIM_BAD = 10034, + NFS4ERR_RECLAIM_CONFLICT = 10035, + NFS4ERR_BADXDR = 10036, + NFS4ERR_LOCKS_HELD = 10037, + NFS4ERR_OPENMODE = 10038, + NFS4ERR_BADOWNER = 10039, + NFS4ERR_BADCHAR = 10040, + NFS4ERR_BADNAME = 10041, + NFS4ERR_BAD_RANGE = 10042, + NFS4ERR_LOCK_NOTSUPP = 10043, + NFS4ERR_OP_ILLEGAL = 10044, + NFS4ERR_DEADLOCK = 10045, + NFS4ERR_FILE_OPEN = 10046, + NFS4ERR_ADMIN_REVOKED = 10047, + NFS4ERR_CB_PATH_DOWN = 10048 +}; + /* * Note: NF4BAD is not actually part of the protocol; it is just used * internally by nfsd. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fcaf3322cadf..a005c999ce43 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -571,7 +571,7 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); struct nfs4_mount_data; #else -- cgit v1.2.3 From e85c40cde9d47156479a28710becc39becf6fe24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:57:57 +0100 Subject: NFSv4: Preparation for the server reboot recovery code. --- fs/nfs/nfs4proc.c | 55 ++++++++++++++++++++++++++-- fs/nfs/nfs4state.c | 89 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_fs.h | 2 + include/linux/nfs_xdr.h | 13 +++++++ 6 files changed, 253 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e8ac9cc08f6..5e52e7d218f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -458,6 +458,54 @@ process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr) } } +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +int +nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr = { + .valid = 0, + }; + struct nfs4_change_info d_cinfo; + struct nfs4_getattr f_getattr = { + .gt_bmval = nfs4_fattr_bitmap, + .gt_attrs = &fattr, + }; + + struct nfs_open_reclaimargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .share_access = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .f_getattr = &f_getattr, + }; + struct nfs_openres o_res = { + .cinfo = &d_cinfo, + .f_getattr = &f_getattr, + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + /* Update the inode attributes */ + nfs_refresh_inode(inode, &fattr); + return status; +} + struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -523,10 +571,9 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt o_arg.id = sp->so_id; status = rpc_call_sync(server->client, &msg, 0); - if (status) { - goto out_up; - } nfs4_increment_seqid(status, sp); + if (status) + goto out_up; process_cinfo(&d_cinfo, &d_attr); nfs_refresh_inode(dir, &d_attr); @@ -555,9 +602,9 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); if (status) goto out_up; - nfs4_increment_seqid(status, sp); memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); } else memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3bdb3a016e25..8738414bd8ea 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -382,6 +382,95 @@ nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) sp->so_seqid++; } +static int reclaimer(void *); +struct reclaimer_args { + struct nfs4_client *clp; + struct completion complete; +}; + +/* + * State recovery routine + */ +void +nfs4_recover_state(struct nfs4_client *clp) +{ + struct reclaimer_args args = { + .clp = clp, + }; + init_completion(&args.complete); + + down_read(&clp->cl_sem); + if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + goto out_failed; + wait_for_completion(&args.complete); + return; +out_failed: + up_read(&clp->cl_sem); +} + +static void +nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +{ + struct nfs4_state *state; + int status; + + list_for_each_entry(state, &sp->so_states, open_states) { + status = nfs4_open_reclaim(sp, state); + if (status) { + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); + /* Mark the file as being 'closed' */ + state->state = 0; + } + } +} + +static int +reclaimer(void *ptr) +{ + struct reclaimer_args *args = (struct reclaimer_args *)ptr; + struct nfs4_client *clp = args->clp; + struct nfs4_state_owner *sp; + int status; + + daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); + allow_signal(SIGKILL); + + complete(&args->complete); + + /* Are there any NFS mounts out there? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + status = nfs4_proc_setclientid(clp, 0, 0); + if (status) + goto out_error; + status = nfs4_proc_setclientid_confirm(clp); + if (status) + goto out_error; + spin_lock(&clp->cl_lock); + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); + down(&sp->so_sema); + nfs4_reclaim_open_state(sp); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + spin_lock(&clp->cl_lock); + } + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + return 0; +out_error: + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", + NIPQUAD(clp->cl_addr.s_addr)); + goto out; +} + /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 67a8e7cf403b..16296618a231 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -166,6 +166,16 @@ static int nfs_stat_to_errno(int); #define NFS4_dec_open_confirm_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 4 +#define NFS4_enc_open_reclaim_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + \ + 11 + \ + encode_getattr_maxsz +#define NFS4_dec_open_reclaim_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + 4 + 5 + 2 + 3 + \ + decode_getattr_maxsz #define NFS4_enc_close_sz compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 5 @@ -666,6 +676,41 @@ encode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmargs *arg) } +static int +encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg) +{ + uint32_t *p; + + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4, opentype 4, claim 4, delegation_type 4 = 44 + */ + RESERVE_SPACE(44); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + switch (arg->share_access) { + case FMODE_READ: + WRITE32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + BUG(); + } + WRITE32(0); /* for linux, share_deny = 0 always */ + WRITE64(arg->clientid); + WRITE32(4); + WRITE32(arg->id); + WRITE32(NFS4_OPEN_NOCREATE); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + WRITE32(NFS4_OPEN_DELEGATE_NONE); + return 0; +} + static int encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh) { @@ -1058,6 +1103,32 @@ out: return status; } +/* + * Encode an OPEN request + */ +static int +nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, + struct nfs_open_reclaimargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_reclaim(&xdr, args); + if (status) + goto out; + status = encode_getattr(&xdr, args->f_getattr); +out: + return status; +} + /* * Encode a READ request @@ -2417,6 +2488,31 @@ out: return status; } +/* + * Decode OPEN_RECLAIM response + */ +static int +nfs4_xdr_dec_open_reclaim(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + status = decode_getattr(&xdr, res->f_getattr, res->server); +out: + return status; +} + /* * Decode SETATTR response */ @@ -2730,6 +2826,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(COMMIT, enc_commit, dec_commit), PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), PROC(FSINFO, enc_fsinfo, dec_fsinfo), diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index f56601a5825d..a6f2d563b605 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -289,6 +289,7 @@ enum { NFSPROC4_CLNT_COMMIT, NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, + NFSPROC4_CLNT_OPEN_RECLAIM, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, NFSPROC4_CLNT_FSINFO, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a005c999ce43..b43412088372 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -554,6 +554,7 @@ struct nfs4_state { /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); @@ -572,6 +573,7 @@ extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_recover_state(struct nfs4_client *); struct nfs4_mount_data; #else diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 244a430f83ef..242b50436d92 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -133,6 +133,19 @@ struct nfs_open_confirmres { nfs4_stateid stateid; }; +/* + * Arguments to the open_reclaim call. + */ +struct nfs_open_reclaimargs { + struct nfs_fh * fh; + __u64 clientid; + __u32 seqid; + __u32 id; + __u32 share_access; + __u32 claim; + struct nfs4_getattr * f_getattr; +}; + /* * Arguments to the close call. */ -- cgit v1.2.3 From dce9f3bf3f0af4a22eb06822a74e2de84f98cd57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:58:38 +0100 Subject: NFSv4: Basic code for recovering file OPEN state after a server reboot. --- fs/nfs/inode.c | 2 + fs/nfs/nfs4proc.c | 235 ++++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4state.c | 129 +++++++++++++++++++++++---- include/linux/nfs_fs.h | 14 ++- 4 files changed, 317 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2c1df6550feb..767cd32e7a36 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1448,6 +1448,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); } + if (list_empty(&clp->cl_superblocks)) + clear_bit(NFS4CLNT_OK, &clp->cl_state); list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); server->nfs4_state = clp; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e52e7d218f8..841934fafaee 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,9 +48,12 @@ #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_POLL_RETRY_TIME (15*HZ) + #define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name #define OPNUM(cp) cp->ops[cp->req_nops].opnum +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -532,7 +535,6 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .share_access = flags & (FMODE_READ|FMODE_WRITE), - .clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, .name = name, @@ -553,6 +555,7 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt .rpc_cred = cred, }; +retry: status = -ENOMEM; if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); @@ -569,6 +572,7 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; + o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); @@ -623,6 +627,9 @@ out_up: nfs4_put_open_state(state); if (inode) iput(inode); + status = nfs4_handle_error(server, status); + if (!status) + goto retry; out: return ERR_PTR(status); } @@ -651,7 +658,9 @@ nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, .rpc_argp = &arg, .rpc_resp = &res, }; + int status; +retry: fattr->valid = 0; if (state) @@ -659,7 +668,13 @@ nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return(rpc_call_sync(server->client, &msg, 0)); + status = rpc_call_sync(server->client, &msg, 0); + if (status) { + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + } + return status; } /* @@ -707,48 +722,12 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_client *clp; struct nfs4_compound compound; struct nfs4_op ops[4]; unsigned char * p; struct qstr q; int status; - clp = server->nfs4_state; - - down_write(&clp->cl_sem); - /* Has the clientid already been initialized? */ - if (clp->cl_state != NFS4CLNT_NEW) - /* Yep, so just read the root attributes and the lease time. */ - goto no_setclientid; - - /* - * SETCLIENTID. - * Until delegations are imported, we don't bother setting the program - * number and port to anything meaningful. - */ - if ((status = nfs4_proc_setclientid(clp, 0, 0))) - goto out_unlock; - - /* - * SETCLIENTID_CONFIRM, plus root filehandle. - * We also get the lease time here. - */ - if ((status = nfs4_proc_setclientid_confirm(clp))) - goto out_unlock; - - /* - * Now that we have instantiated the clientid and determined - * the lease time, we can initialize the renew daemon for this - * server. - * FIXME: we only need one renewd daemon per server. - */ - nfs4_schedule_state_renewal(clp); - clp->cl_state = NFS4CLNT_OK; - -no_setclientid: - up_write(&clp->cl_sem); - /* * Now we do a separate LOOKUP for each component of the mount path. * The LOOKUPs are done separately so that we can conveniently @@ -787,9 +766,6 @@ no_setclientid: } break; } - return status; -out_unlock: - up_write(&clp->cl_sem); out: return status; } @@ -1410,6 +1386,20 @@ nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_call_compound(&compound, NULL, 0); } +static void +nfs4_restart_read(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + static void nfs4_read_done(struct rpc_task *task) { @@ -1417,6 +1407,10 @@ nfs4_read_done(struct rpc_task *task) struct inode *inode = data->inode; struct nfs_fattr *fattr = data->res.fattr; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_read; + return; + } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); /* Check cache consistency */ @@ -1484,12 +1478,30 @@ nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mtime = fattr->mtime; } +static void +nfs4_restart_write(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + static void nfs4_write_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct inode *inode = data->inode; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); nfs4_write_refresh_inode(inode, data->res.fattr); @@ -1552,8 +1564,13 @@ static void nfs4_commit_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; - nfs4_write_refresh_inode(data->inode, data->res.fattr); + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } + nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -1599,6 +1616,14 @@ renew_done(struct rpc_task *task) { struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; unsigned long timestamp = (unsigned long)task->tk_calldata; + + if (task->tk_status < 0) { + switch (task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + nfs4_schedule_state_recovery(clp); + return; + } + } spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -1617,6 +1642,25 @@ nfs4_proc_async_renew(struct nfs4_client *clp) return rpc_call_async(clp->cl_rpcclient, &msg, 0, renew_done, (void *)jiffies); } +int +nfs4_proc_renew(struct nfs4_client *clp) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + unsigned long now = jiffies; + int status; + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return status; +} + /* * We will need to arrange for the VFS layer to provide an atomic open. * Until then, this open method is prone to inefficiency and race conditions @@ -1697,6 +1741,113 @@ nfs4_request_init(struct nfs_page *req, struct file *filp) req->wb_cred = get_rpccred(state->owner->so_cred); } +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return 0; + switch(task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + rpc_delay(task, NFS4_POLL_RETRY_TIME); + task->tk_status = 0; + return -EAGAIN; + } + return 0; +} + +int +nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +{ + DEFINE_WAIT(wait); + sigset_t oldset; + int interruptible, res; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + interruptible = TASK_UNINTERRUPTIBLE; + if (clnt->cl_intr) + interruptible = TASK_INTERRUPTIBLE; + do { + res = 0; + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state) && + !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + break; + if (clnt->cl_intr && signalled()) { + res = -ERESTARTSYS; + break; + } + schedule(); + } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); + finish_wait(&clp->cl_waitq, &wait); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static int +nfs4_delay(struct rpc_clnt *clnt) +{ + sigset_t oldset; + int res = 0; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + if (clnt->cl_intr) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + if (signalled()) + res = -ERESTARTSYS; + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + } + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int +nfs4_handle_error(struct nfs_server *server, int errorcode) +{ + struct nfs4_client *clp = server->nfs4_state; + int ret = errorcode; + + switch(errorcode) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + ret = nfs4_wait_clnt_recover(server->client, clp); + break; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client); + break; + default: + if (errorcode <= -1000) { + printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", + __FUNCTION__, -errorcode); + ret = -EIO; + } + } + /* We failed to handle the error */ + return ret; +} + static int nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8738414bd8ea..36f3f7e4c0ef 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -56,6 +56,7 @@ nfs4_stateid one_stateid = static LIST_HEAD(nfs4_clientid_list); +static void nfs4_recover_state(void *); extern void nfs4_renew_state(void *); void @@ -98,9 +99,12 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); INIT_LIST_HEAD(&clp->cl_superblocks); - clp->cl_state = NFS4CLNT_NEW; + init_waitqueue_head(&clp->cl_waitq); + INIT_RPC_WAITQ(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_NEW; } return clp; } @@ -155,6 +159,9 @@ nfs4_put_client(struct nfs4_client *clp) return; list_del(&clp->cl_servers); spin_unlock(&state_spinlock); + BUG_ON(!list_empty(&clp->cl_superblocks)); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); nfs4_kill_renewd(clp); nfs4_free_client(clp); } @@ -175,6 +182,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); + sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; @@ -215,13 +223,17 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; + new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (!sp) + if (sp) { + if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) + nfs4_wait_clnt_recover(server->client, clp); + } else put_rpccred(cred); return sp; } @@ -353,6 +365,7 @@ nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; + int status = 0; if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; @@ -360,8 +373,16 @@ nfs4_put_open_state(struct nfs4_state *state) spin_unlock(&inode->i_lock); down(&owner->so_sema); list_del(&state->open_states); - if (state->state != 0) - nfs4_do_close(inode, state); + if (state->state != 0) { + do { + status = nfs4_do_close(inode, state); + if (!status) + break; + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + } up(&owner->so_sema); iput(inode); nfs4_free_open_state(state); @@ -392,41 +413,81 @@ struct reclaimer_args { * State recovery routine */ void -nfs4_recover_state(struct nfs4_client *clp) +nfs4_recover_state(void *data) { + struct nfs4_client *clp = (struct nfs4_client *)data; struct reclaimer_args args = { .clp = clp, }; + might_sleep(); + init_completion(&args.complete); down_read(&clp->cl_sem); - if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) goto out_failed; + if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + goto out_failed_clear; wait_for_completion(&args.complete); return; +out_failed_clear: + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); out_failed: up_read(&clp->cl_sem); } -static void +/* + * Schedule a state recovery attempt + */ +void +nfs4_schedule_state_recovery(struct nfs4_client *clp) +{ + if (!clp) + return; + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_OK, &clp->cl_state); + smp_mb__after_clear_bit(); + schedule_work(&clp->cl_recoverd); +} + +static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) { struct nfs4_state *state; - int status; + int status = 0; list_for_each_entry(state, &sp->so_states, open_states) { status = nfs4_open_reclaim(sp, state); - if (status) { - /* - * Open state on this file cannot be recovered - * All we can do is revert to using the zero stateid. - */ - memset(state->stateid.data, 0, + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, sizeof(state->stateid.data)); - /* Mark the file as being 'closed' */ - state->state = 0; + /* Mark the file as being 'closed' */ + state->state = 0; + break; + case -NFS4ERR_STALE_CLIENTID: + goto out_err; } } + return 0; +out_err: + return status; } static int @@ -435,6 +496,7 @@ reclaimer(void *ptr) struct reclaimer_args *args = (struct reclaimer_args *)ptr; struct nfs4_client *clp = args->clp; struct nfs4_state_owner *sp; + int generation; int status; daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); @@ -445,29 +507,58 @@ reclaimer(void *ptr) /* Are there any NFS mounts out there? */ if (list_empty(&clp->cl_superblocks)) goto out; + if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) { + status = nfs4_proc_renew(clp); + if (status == 0) { + set_bit(NFS4CLNT_OK, &clp->cl_state); + goto out; + } + } status = nfs4_proc_setclientid(clp, 0, 0); if (status) goto out_error; status = nfs4_proc_setclientid_confirm(clp); if (status) goto out_error; + generation = ++(clp->cl_generation); + clear_bit(NFS4CLNT_NEW, &clp->cl_state); + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_read(&clp->cl_sem); + nfs4_schedule_state_renewal(clp); +restart_loop: spin_lock(&clp->cl_lock); list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_generation - generation <= 0) + continue; atomic_inc(&sp->so_count); spin_unlock(&clp->cl_lock); down(&sp->so_sema); - nfs4_reclaim_open_state(sp); + if (sp->so_generation - generation < 0) { + smp_rmb(); + sp->so_generation = clp->cl_generation; + status = nfs4_reclaim_open_state(sp); + } up(&sp->so_sema); nfs4_put_state_owner(sp); - spin_lock(&clp->cl_lock); + if (status < 0) { + if (status == -NFS4ERR_STALE_CLIENTID) + nfs4_schedule_state_recovery(clp); + goto out; + } + goto restart_loop; } spin_unlock(&clp->cl_lock); out: - up_read(&clp->cl_sem); + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", NIPQUAD(clp->cl_addr.s_addr)); + up_read(&clp->cl_sem); goto out; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b43412088372..3efc91b200e6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -465,6 +465,7 @@ extern void * nfs_root_data(void); enum nfs4_client_state { NFS4CLNT_OK = 0, NFS4CLNT_NEW, + NFS4CLNT_SETUP_STATE, }; /* @@ -475,7 +476,8 @@ struct nfs4_client { struct in_addr cl_addr; /* Server identifier */ u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; - enum nfs4_client_state cl_state; + unsigned long cl_state; + long cl_generation; u32 cl_lockowner_id; @@ -499,6 +501,10 @@ struct nfs4_client { unsigned long cl_lease_time; unsigned long cl_last_renewal; struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. @@ -523,6 +529,7 @@ struct nfs4_state_owner { u32 so_seqid; /* protected by so_sema */ unsigned int so_flags; /* protected by so_sema */ atomic_t so_count; + long so_generation; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; @@ -556,7 +563,9 @@ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); +extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); @@ -573,7 +582,8 @@ extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); -extern void nfs4_recover_state(struct nfs4_client *); +extern int nfs4_handle_error(struct nfs_server *, int); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); struct nfs4_mount_data; #else -- cgit v1.2.3 From 2642498f5cd2b90e55736e8cfd312c05f63301e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 16:59:20 +0100 Subject: RPC/NFSv4: Allow lease RENEW calls to be soft (i.e. to time out) despite the mount being hard. --- fs/nfs/nfs4proc.c | 3 ++- include/linux/sunrpc/sched.h | 2 ++ net/sunrpc/clnt.c | 4 ++-- net/sunrpc/sched.c | 5 ++++- net/sunrpc/xprt.c | 6 +++--- 5 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 841934fafaee..f617d0bf5313 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1639,7 +1639,8 @@ nfs4_proc_async_renew(struct nfs4_client *clp) .rpc_cred = clp->cl_cred, }; - return rpc_call_async(clp->cl_rpcclient, &msg, 0, renew_done, (void *)jiffies); + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + renew_done, (void *)jiffies); } int diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 1113d7f3df13..6b8e3eb91513 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -108,6 +108,7 @@ typedef void (*rpc_action)(struct rpc_task *); #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ +#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SETUID(t) ((t)->tk_flags & RPC_TASK_SETUID) @@ -117,6 +118,7 @@ typedef void (*rpc_action)(struct rpc_task *); #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_ACTIVATED(t) ((t)->tk_active) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) +#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_SLEEPING 0 #define RPC_TASK_RUNNING 1 diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6c6a8310000a..b41434a36009 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -798,7 +798,7 @@ call_timeout(struct rpc_task *task) to->to_retries = clnt->cl_timeout.to_retries; dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); - if (clnt->cl_softrtry) { + if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); @@ -841,7 +841,7 @@ call_decode(struct rpc_task *task) } if (task->tk_status < 12) { - if (!clnt->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { task->tk_action = call_bind; clnt->cl_stats->rpcretrans++; goto out_retry; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 79ebf10e03d9..ad56eb17e80f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -731,8 +731,11 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, list_add(&task->tk_task, &all_tasks); spin_unlock(&rpc_sched_lock); - if (clnt) + if (clnt) { atomic_inc(&clnt->cl_users); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + } #ifdef RPC_DEBUG task->tk_magic = 0xf00baa; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e6c5f7ab7968..aa93b824433d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -488,7 +488,7 @@ xprt_connect(struct rpc_task *task) case -ECONNREFUSED: case -ECONNRESET: case -ENOTCONN: - if (!task->tk_client->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { rpc_delay(task, RPC_REESTABLISH_TIMEOUT); task->tk_status = -ENOTCONN; break; @@ -496,7 +496,7 @@ xprt_connect(struct rpc_task *task) default: /* Report myriad other possible returns. If this file * system is soft mounted, just error out, like Solaris. */ - if (task->tk_client->cl_softrtry) { + if (RPC_IS_SOFT(task)) { printk(KERN_WARNING "RPC: error %d connecting to server %s, exiting\n", -status, task->tk_client->cl_server); @@ -530,7 +530,7 @@ xprt_connect_status(struct rpc_task *task) } /* if soft mounted, just cause this RPC to fail */ - if (task->tk_client->cl_softrtry) + if (RPC_IS_SOFT(task)) task->tk_status = -EIO; switch (task->tk_status) { -- cgit v1.2.3 From ab91d13dcba1d0b69394fa1b0a3b034db71316ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 17:00:03 +0100 Subject: RPC: Ensure that we disconnect TCP sockets if there has been no NFS traffic for the last 5 minutes. This code also affects NFSv2/v3. --- include/linux/sunrpc/xprt.h | 7 +++ net/sunrpc/xprt.c | 106 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 90 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8472b1c5ad2e..393e6dc6a268 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -163,6 +163,12 @@ struct rpc_xprt { tcp_offset; /* fragment offset */ unsigned long tcp_copied, /* copied to request */ tcp_flags; + /* + * Disconnection of idle sockets + */ + struct work_struct task_cleanup; + struct timer_list timer; + unsigned long last_used; /* * Send stuff @@ -202,6 +208,7 @@ int xprt_clear_backlog(struct rpc_xprt *); void xprt_sock_setbufsize(struct rpc_xprt *); #define XPRT_CONNECT 0 +#define XPRT_LOCKED 1 #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index aa93b824433d..84ccc9d8e05b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ #endif #define XPRT_MAX_BACKOFF (8) +#define XPRT_IDLE_TIMEOUT (5*60*HZ) /* * Local functions @@ -139,25 +141,33 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt->snd_task) { - if (xprt->nocong || __xprt_get_cong(xprt, task)) { - xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } - } + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (task == xprt->snd_task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; } - if (xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); - else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (xprt->nocong || __xprt_get_cong(xprt, task)) { + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; } - return xprt->snd_task == task; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); +out_sleep: + dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; } static inline int @@ -177,15 +187,15 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) { struct rpc_task *task; - if (xprt->snd_task) + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) return; + if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) - return; task = rpc_wake_up_next(&xprt->sending); if (!task) - return; + goto out_unlock; } if (xprt->nocong || __xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; @@ -194,7 +204,12 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) req->rq_bytes_sent = 0; req->rq_ntrans++; } + return; } +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); } /* @@ -203,9 +218,13 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) static void __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { - if (xprt->snd_task == task) + if (xprt->snd_task == task) { xprt->snd_task = NULL; - __xprt_lock_write_next(xprt); + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); + __xprt_lock_write_next(xprt); + } } static inline void @@ -393,6 +412,15 @@ xprt_close(struct rpc_xprt *xprt) sock_release(sock); } +static void +xprt_socket_autoclose(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + + xprt_close(xprt); + xprt_release_write(xprt, NULL); +} + /* * Mark a transport as disconnected */ @@ -406,6 +434,27 @@ xprt_disconnect(struct rpc_xprt *xprt) spin_unlock_bh(&xprt->sock_lock); } +/* + * Used to allow disconnection when we've been idle + */ +static void +xprt_init_autodisconnect(unsigned long data) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)data; + + spin_lock(&xprt->sock_lock); + if (!list_empty(&xprt->recv) || xprt->shutdown) + goto out_abort; + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + goto out_abort; + spin_unlock(&xprt->sock_lock); + /* Let keventd close the socket */ + schedule_work(&xprt->task_cleanup); + return; +out_abort: + spin_unlock(&xprt->sock_lock); +} + /* * Attempt to connect a TCP socket. * @@ -1254,6 +1303,8 @@ xprt_reserve(struct rpc_task *task) spin_lock(&xprt->xprt_lock); do_xprt_reserve(task); spin_unlock(&xprt->xprt_lock); + if (task->tk_rqstp) + del_timer_sync(&xprt->timer); } } @@ -1333,6 +1384,9 @@ xprt_release(struct rpc_task *task) __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); + xprt->last_used = jiffies; + if (list_empty(&xprt->recv) && !xprt->shutdown) + mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -1403,6 +1457,11 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; /* Set timeout parameters */ if (to) { @@ -1583,6 +1642,7 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); + del_timer_sync(&xprt->timer); } /* -- cgit v1.2.3 From 5bb0bc7c89ec7d694cd77487fe26f004ee0d0bb7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 17:00:45 +0100 Subject: NFSv4: Atomic open(). Fixes races w.r.t. opening files. --- fs/nfs/dir.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/inode.c | 54 +++++++++++++++++-- fs/nfs/nfs3proc.c | 2 + fs/nfs/nfs4proc.c | 134 ++++++++++++++++++++++++++++++++-------------- fs/nfs/nfs4state.c | 2 - fs/nfs/proc.c | 2 + include/linux/nfs_fs.h | 6 +++ include/linux/nfs_xdr.h | 2 + 8 files changed, 295 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0796bd6a3ac0..1adc5bfea4f6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -72,6 +72,26 @@ struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* * Open file */ @@ -670,7 +690,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru goto out; error = -ENOMEM; - dentry->d_op = &nfs_dentry_operations; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); @@ -702,6 +722,119 @@ out: return ERR_PTR(error); } +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + int error = 0; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + error = -ENAMETOOLONG; + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + goto out; + } + } +no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); +out: + BUG_ON(error > 0); + return ERR_PTR(error); +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); + if (!is_atomic_open(parent->d_inode, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) + goto out; + /* If this is exclusive open, just revalidate */ + if (openflags & O_EXCL) + goto no_open; + } + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + lock_kernel(); + ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + static inline int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) { @@ -1306,6 +1439,9 @@ nfs_permission(struct inode *inode, int mask, struct nameidata *nd) /* We only need to check permissions on file open() and access() */ if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) return 0; + /* NFSv4 has atomic_open... */ + if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) + return 0; } lock_kernel(); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 767cd32e7a36..276f3a10298b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -303,7 +303,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) server = NFS_SB(sb); sb->s_magic = NFS_SUPER_MAGIC; - sb->s_op = &nfs_sops; /* Did getting the root inode fail? */ if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) @@ -312,7 +311,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &nfs_dentry_operations; + sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { @@ -513,6 +512,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) goto out_shutdown; } + sb->s_op = &nfs_sops; err = nfs_sb_init(sb, authflavor); if (err != 0) goto out_noinit; @@ -745,7 +745,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -837,7 +837,12 @@ printk("nfs_setattr: revalidate failed, error=%d\n", error); filemap_fdatawait(inode->i_mapping); if (error) goto out; + /* Optimize away unnecessary truncates */ + if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) + attr->ia_valid &= ~ATTR_SIZE; } + if (!attr->ia_valid) + goto out; error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error) @@ -1357,6 +1362,48 @@ static struct file_system_type nfs_fs_type = { #ifdef CONFIG_NFS_V4 +static void nfs4_clear_inode(struct inode *); + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + list_del(&state->inode_states); + nfs4_put_open_state(state); + } + /* Now call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + + static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; @@ -1481,6 +1528,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, if ((server->idmap = nfs_idmap_new(server)) == NULL) printk(KERN_WARNING "NFS: couldn't start IDmap\n"); + sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0ca8bbc17e13..e5b2ad2f8623 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -898,6 +898,8 @@ nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *pa struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f617d0bf5313..a6ac022add44 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -45,6 +45,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC @@ -509,6 +510,9 @@ nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) return status; } +/* + * Returns an nfs4_state + an referenced inode + */ struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -617,19 +621,23 @@ retry: up(&sp->so_sema); nfs4_put_state_owner(sp); - iput(inode); return state; out_up: up(&sp->so_sema); nfs4_put_state_owner(sp); - if (state) + if (state) { nfs4_put_open_state(state); - if (inode) + state = NULL; + } + if (inode) { iput(inode); + inode = NULL; + } status = nfs4_handle_error(server, status); if (!status) goto retry; + BUG_ON(status < -1000 || status > 0); out: return ERR_PTR(status); } @@ -718,6 +726,56 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state) return status; } +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_put_open_state(state); + iput(inode); + return 0; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -808,28 +866,39 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct inode * inode = dentry->d_inode; int size_change = sattr->ia_valid & ATTR_SIZE; struct nfs4_state *state = NULL; - int status; + int need_iput = 0; + int status; fattr->valid = 0; if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dentry->d_parent->d_inode, + state = nfs4_find_state_bypid(inode, current->pid); + + if (!state) { + struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); - put_rpccred(cred); + put_rpccred(cred); + need_iput = 1; + } if (IS_ERR(state)) return PTR_ERR(state); if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n"); - nfs4_put_open_state(state); - return -EIO; + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; } } status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); - if (state) +out: + if (state) { + inode = state->inode; nfs4_put_open_state(state); + if (need_iput) + iput(inode); + } return status; } @@ -1085,18 +1154,18 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, state = nfs4_do_open(dir, name, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { - inode = igrab(state->inode); + inode = state->inode; if (flags & O_EXCL) { struct nfs_fattr fattr; int status; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { + nfs4_put_open_state(state); iput(inode); inode = ERR_PTR(status); } } - nfs4_put_open_state(state); } else inode = (struct inode *)state; return inode; @@ -1672,43 +1741,28 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct inode *dir = dentry->d_parent->d_inode; - struct rpc_cred *cred; struct nfs4_state *state; - int flags = filp->f_flags; - int status = 0; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, (int)dentry->d_name.len, dentry->d_name.name); - if ((flags + 1) & O_ACCMODE) - flags++; - - lock_kernel(); -/* -* We have already opened the file "O_EXCL" in nfs4_proc_create!! -* This ugliness will go away with lookup-intent... -*/ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred); - if (IS_ERR(state)) { - status = PTR_ERR(state); - state = NULL; - } else if (filp->f_mode & FMODE_WRITE) - nfs_set_mmcred(inode, cred); - if (inode != filp->f_dentry->d_inode) { + /* Find our open stateid */ + state = nfs4_find_state_bypid(inode, current->pid); + if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - status = -EIO; /* ERACE actually */ - nfs4_put_open_state(state); - state = NULL; + return -EIO; /* ERACE actually */ + } + nfs4_put_open_state(state); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); + unlock_kernel(); } filp->private_data = state; - put_rpccred(cred); - unlock_kernel(); - return status; + return 0; } /* @@ -1922,6 +1976,8 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp) struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 36f3f7e4c0ef..333daf5f48e7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -349,7 +349,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = inode; - atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -384,7 +383,6 @@ nfs4_put_open_state(struct nfs4_state *state) } while (!status); } up(&owner->so_sema); - iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6570e719ee54..79f18e4cfb81 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -656,6 +656,8 @@ nfs_request_compatible(struct nfs_page *req, struct file *filp, struct page *pag struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3efc91b200e6..0605e9c63026 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -558,6 +558,9 @@ struct nfs4_state { }; +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); @@ -566,6 +569,8 @@ extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); @@ -581,6 +586,7 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); +extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 242b50436d92..4393ae7c305d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -637,6 +637,8 @@ struct nfs_page; */ struct nfs_rpc_ops { int version; /* Protocol version */ + struct dentry_operations *dentry_ops; + struct inode_operations *dir_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); -- cgit v1.2.3 From c04e88dbd89ec891a252ffe09378310dd1d18546 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 17:01:32 +0100 Subject: NFSv4: Share open_owner structs between several different processes. Reduces the load on the server. --- fs/nfs/nfs4proc.c | 80 +++++++++++++++++++++++++++----- fs/nfs/nfs4state.c | 118 ++++++++++++++++++++++++++++++++++++++++++------ fs/nfs/nfs4xdr.c | 82 +++++++++++++++++++++++++++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_fs.h | 7 ++- include/linux/nfs_xdr.h | 1 + 6 files changed, 263 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a6ac022add44..10f67569de73 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -616,8 +616,13 @@ retry: memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); } else memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + spin_lock(&inode->i_lock); + if (flags & FMODE_READ) + state->nreaders++; + if (flags & FMODE_WRITE) + state->nwriters++; state->state |= flags & (FMODE_READ|FMODE_WRITE); - state->pid = current->pid; + spin_unlock(&inode->i_lock); up(&sp->so_sema); nfs4_put_state_owner(sp); @@ -634,6 +639,21 @@ out_up: iput(inode); inode = NULL; } + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + goto retry; + } status = nfs4_handle_error(server, status); if (!status) goto retry; @@ -722,6 +742,36 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state) * the state_owner. we keep this around to process errors */ nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); + + return status; +} + +int +nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_state_owner *sp = state->owner; + int status = 0; + struct nfs_closeargs arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .share_access = mode, + }; + struct nfs_closeres res = { + .status = 0, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); return status; } @@ -771,7 +821,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) return 1; } d_drop(dentry); - nfs4_put_open_state(state); + nfs4_close_state(state, openflags); iput(inode); return 0; } @@ -872,15 +922,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, fattr->valid = 0; if (size_change) { - state = nfs4_find_state_bypid(inode, current->pid); - + struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + state = nfs4_find_state(inode, cred, FMODE_WRITE); if (!state) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); - put_rpccred(cred); need_iput = 1; } + put_rpccred(cred); if (IS_ERR(state)) return PTR_ERR(state); @@ -895,7 +944,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, out: if (state) { inode = state->inode; - nfs4_put_open_state(state); + nfs4_close_state(state, FMODE_WRITE); if (need_iput) iput(inode); } @@ -1161,7 +1210,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { - nfs4_put_open_state(state); + nfs4_close_state(state, flags); iput(inode); inode = ERR_PTR(status); } @@ -1742,6 +1791,7 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; struct nfs4_state *state; + struct rpc_cred *cred; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, @@ -1750,12 +1800,14 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp) /* Find our open stateid */ - state = nfs4_find_state_bypid(inode, current->pid); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + state = nfs4_find_state(inode, cred, filp->f_mode); + put_rpccred(cred); if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); return -EIO; /* ERACE actually */ } - nfs4_put_open_state(state); + nfs4_close_state(state, filp->f_mode); if (filp->f_mode & FMODE_WRITE) { lock_kernel(); nfs_set_mmcred(inode, state->owner->so_cred); @@ -1774,7 +1826,7 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp) struct nfs4_state *state = (struct nfs4_state *)filp->private_data; if (state) - nfs4_put_open_state(state); + nfs4_close_state(state, filp->f_mode); return 0; } @@ -1816,6 +1868,9 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) rpc_delay(task, NFS4_POLL_RETRY_TIME); task->tk_status = 0; return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; } return 0; } @@ -1892,6 +1947,9 @@ nfs4_handle_error(struct nfs_server *server, int errorcode) case -NFS4ERR_DELAY: ret = nfs4_delay(server->client); break; + case -NFS4ERR_OLD_STATEID: + ret = 0; + break; default: if (errorcode <= -1000) { printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 333daf5f48e7..7a078a42eec3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -188,6 +188,23 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) return sp; } +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp, *res = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_cred != cred) + continue; + atomic_inc(&sp->so_count); + /* Move to the head of the list */ + list_move(&sp->so_list, &clp->cl_state_owners); + res = sp; + break; + } + return res; +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -208,6 +225,15 @@ nfs4_alloc_state_owner(void) return sp; } +static void +nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + spin_lock(&clp->cl_lock); + list_del_init(&sp->so_list); + spin_unlock(&clp->cl_lock); +} + struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { @@ -217,7 +243,9 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) get_rpccred(cred); new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_client_grab_unused(clp, cred); + sp = nfs4_find_state_owner(clp, cred); + if (sp == NULL) + sp = nfs4_client_grab_unused(clp, cred); if (sp == NULL && new != NULL) { list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; @@ -248,6 +276,8 @@ nfs4_put_state_owner(struct nfs4_state_owner *sp) return; if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) goto out_free; + if (list_empty(&sp->so_list)) + goto out_free; list_move(&sp->so_list, &clp->cl_unused); clp->cl_nunused++; spin_unlock(&clp->cl_lock); @@ -269,24 +299,38 @@ nfs4_alloc_open_state(void) state = kmalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->pid = current->pid; state->state = 0; + state->nreaders = 0; + state->nwriters = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); return state; } static struct nfs4_state * -__nfs4_find_state_bypid(struct inode *inode, pid_t pid) +__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; + mode &= (FMODE_READ|FMODE_WRITE); list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->pid == pid) { - atomic_inc(&state->count); - return state; - } + if (state->owner->so_cred != cred) + continue; + if ((mode & FMODE_READ) != 0 && state->nreaders == 0) + continue; + if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) + continue; + if ((state->state & mode) != mode) + continue; + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); + atomic_inc(&state->count); + if (mode & FMODE_READ) + state->nreaders++; + if (mode & FMODE_WRITE) + state->nwriters++; + return state; } return NULL; } @@ -298,7 +342,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { + /* Is this in the process of being freed? */ + if (state->nreaders == 0 && state->nwriters == 0) + continue; if (state->owner == owner) { + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -307,16 +356,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) } struct nfs4_state * -nfs4_find_state_bypid(struct inode *inode, pid_t pid) +nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; spin_lock(&inode->i_lock); - state = __nfs4_find_state_bypid(inode, pid); - /* Add the state to the tail of the inode's list */ - if (state) - list_move_tail(&state->inode_states, &nfsi->open_states); + state = __nfs4_find_state(inode, cred, mode); spin_unlock(&inode->i_lock); return state; } @@ -387,6 +432,50 @@ nfs4_put_open_state(struct nfs4_state *state) nfs4_put_state_owner(owner); } +void +nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + int newstate; + int status = 0; + + down(&owner->so_sema); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + if (mode & FMODE_READ) + state->nreaders--; + if (mode & FMODE_WRITE) + state->nwriters--; + if (state->nwriters == 0 && state->nreaders == 0) + list_del_init(&state->inode_states); + spin_unlock(&inode->i_lock); + do { + newstate = 0; + if (state->state == 0) + break; + if (state->nreaders) + newstate |= FMODE_READ; + if (state->nwriters) + newstate |= FMODE_WRITE; + if (state->state == newstate) + break; + if (newstate != 0) + status = nfs4_do_downgrade(inode, state, newstate); + else + status = nfs4_do_close(inode, state); + if (!status) { + state->state = newstate; + break; + } + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + up(&owner->so_sema); + nfs4_put_open_state(state); +} + /* * Called with sp->so_sema held. * @@ -399,6 +488,9 @@ nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; + /* If the server returns BAD_SEQID, unhash state_owner here */ + if (status == -NFS4ERR_BAD_SEQID) + nfs4_unhash_state_owner(sp); } static int reclaimer(void *); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 16296618a231..7a2d241e50e8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -176,6 +176,14 @@ static int nfs_stat_to_errno(int); op_decode_hdr_maxsz + \ 4 + 5 + 2 + 3 + \ decode_getattr_maxsz +#define NFS4_enc_open_downgrade_sz \ + compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 7 +#define NFS4_dec_open_downgrade_sz \ + compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4 #define NFS4_enc_close_sz compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 5 @@ -711,6 +719,22 @@ encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg) return 0; } +static int +encode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(16+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_DOWNGRADE); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + WRITE32(arg->share_access); + /* No deny modes */ + WRITE32(0); + + return 0; +} + static int encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh) { @@ -1129,6 +1153,27 @@ out: return status; } +/* + * Encode an OPEN_DOWNGRADE request + */ +static int +nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_downgrade(&xdr, args); +out: + return status; +} /* * Encode a READ request @@ -2001,6 +2046,19 @@ decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) return 0; } +static int +decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} static int decode_putfh(struct xdr_stream *xdr) @@ -2377,6 +2435,29 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs DECODE_TAIL; } + +/* + * Decode OPEN_DOWNGRADE response + */ +static int +nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); +out: + return status; +} + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -2827,6 +2908,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), PROC(FSINFO, enc_fsinfo, dec_fsinfo), diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a6f2d563b605..4a61a4682718 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -290,6 +290,7 @@ enum { NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, NFSPROC4_CLNT_OPEN_RECLAIM, + NFSPROC4_CLNT_OPEN_DOWNGRADE, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, NFSPROC4_CLNT_FSINFO, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0605e9c63026..cfbb7ff1aa89 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -549,10 +549,11 @@ struct nfs4_state { struct nfs4_state_owner *owner; /* Pointer to the open owner */ struct inode *inode; /* Pointer to the inode */ - pid_t pid; /* Thread that called OPEN */ nfs4_stateid stateid; + unsigned int nreaders; + unsigned int nwriters; int state; /* State on the server (R,W, or RW) */ atomic_t count; }; @@ -568,6 +569,7 @@ extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); +int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); @@ -586,7 +588,8 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4393ae7c305d..21827ad1a71e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -153,6 +153,7 @@ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; + __u32 share_access; }; struct nfs_closeres { -- cgit v1.2.3 From 1f37cd43d9e866e99f81ffe6141b49cc6f83f619 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 17:02:21 +0100 Subject: NFSv4: Fix a bug which was causing Oopses if the client was mounting more than one partition from the same server. --- fs/nfs/idmap.c | 44 ++++++++++++++++++++++---------------------- fs/nfs/inode.c | 15 +++++---------- fs/nfs/nfs4state.c | 2 ++ fs/nfs/nfs4xdr.c | 8 ++++---- include/linux/nfs_fs.h | 5 +++++ include/linux/nfs_fs_sb.h | 1 - include/linux/nfs_idmap.h | 12 ++++++------ 7 files changed, 44 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index bd1d1335561c..1d5d8a9dba1e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -88,23 +88,27 @@ static struct rpc_pipe_ops idmap_upcall_ops = { .destroy_msg = idmap_pipe_destroy_msg, }; -void * -nfs_idmap_new(struct nfs_server *server) +void +nfs_idmap_new(struct nfs4_client *clp) { struct idmap *idmap; + if (clp->cl_idmap != NULL) + return; if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return (NULL); + return; memset(idmap, 0, sizeof(*idmap)); snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", server->client->cl_pathname); + "%s/idmap", clp->cl_rpcclient->cl_pathname); idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, idmap, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) - goto err_free; + if (IS_ERR(idmap->idmap_dentry)) { + kfree(idmap); + return; + } init_MUTEX(&idmap->idmap_lock); init_MUTEX(&idmap->idmap_im_lock); @@ -112,22 +116,18 @@ nfs_idmap_new(struct nfs_server *server) idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; - return (idmap); - - err_free: - kfree(idmap); - return (NULL); + clp->cl_idmap = idmap; } void -nfs_idmap_delete(struct nfs_server *server) +nfs_idmap_delete(struct nfs4_client *clp) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; if (!idmap) return; rpc_unlink(idmap->idmap_path); - server->idmap = NULL; + clp->cl_idmap = NULL; kfree(idmap); } @@ -468,29 +468,29 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) return (hash); } -int nfs_map_name_to_uid(struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs_server *server, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs_server *server, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 276f3a10298b..f23707e5ecb2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -158,11 +158,6 @@ nfs_put_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); -#ifdef CONFIG_NFS_V4 - if (server->idmap != NULL) - nfs_idmap_delete(server); -#endif /* CONFIG_NFS_V4 */ - nfs4_renewd_prepare_shutdown(server); if (server->client != NULL) @@ -1494,6 +1489,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp->cl_rpcclient = clnt; clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); } if (list_empty(&clp->cl_superblocks)) clear_bit(NFS4CLNT_OK, &clp->cl_state); @@ -1507,6 +1503,10 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, printk(KERN_WARNING "NFS: cannot create RPC client.\n"); goto out_remove_list; } + if (server->nfs4_state->cl_idmap == NULL) { + printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + goto out_shutdown; + } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; @@ -1525,16 +1525,11 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, goto out_shutdown; } - if ((server->idmap = nfs_idmap_new(server)) == NULL) - printk(KERN_WARNING "NFS: couldn't start IDmap\n"); - sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; rpciod_down(); - if (server->idmap != NULL) - nfs_idmap_delete(server); out_shutdown: rpc_shutdown_client(server->client); out_remove_list: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7a078a42eec3..0b391c7f4ce4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #define OPENOWNER_POOL_SIZE 8 @@ -124,6 +125,7 @@ nfs4_free_client(struct nfs4_client *clp) BUG_ON(!list_empty(&clp->cl_state_owners)); if (clp->cl_cred) put_rpccred(clp->cl_cred); + nfs_idmap_delete(clp); if (clp->cl_rpcclient) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7a2d241e50e8..3a372266bb03 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -328,7 +328,7 @@ encode_attrs(struct xdr_stream *xdr, struct iattr *iap, if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -340,7 +340,7 @@ encode_attrs(struct xdr_stream *xdr, struct iattr *iap, len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -1677,7 +1677,7 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_map_name_to_uid(server, (char *)p, dummy32, + if ((status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, dummy32, &nfp->uid)) < 0) { dprintk("read_attrs: name-to-uid mapping failed!\n"); nfp->uid = -2; @@ -1694,7 +1694,7 @@ decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_map_group_to_gid(server, (char *)p, dummy32, + if ((status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, dummy32, &nfp->gid)) < 0) { dprintk("read_attrs: group-to-gid mapping failed!\n"); nfp->gid = -2; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cfbb7ff1aa89..512d9203905f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -438,6 +438,8 @@ extern void * nfs_root_data(void); #ifdef CONFIG_NFS_V4 +struct idmap; + /* * In a seqid-mutating op, this macro controls which error return * values trigger incrementation of the seqid. @@ -506,6 +508,9 @@ struct nfs4_client { wait_queue_head_t cl_waitq; struct rpc_wait_queue cl_rpcwaitq; + /* idmapper */ + struct idmap * cl_idmap; + /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5f0b0ce3aa2c..1b5f7e130502 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -38,7 +38,6 @@ struct nfs_server { struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ - void *idmap; #endif }; diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index c95076e5941b..50df56b5a01e 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -60,13 +60,13 @@ struct idmap_msg { }; #ifdef __KERNEL__ -void *nfs_idmap_new(struct nfs_server *); -void nfs_idmap_delete(struct nfs_server *); +void nfs_idmap_new(struct nfs4_client *); +void nfs_idmap_delete(struct nfs4_client *); -int nfs_map_name_to_uid(struct nfs_server *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs_server *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs_server *, __u32, char *); -int nfs_map_gid_to_group(struct nfs_server *, __u32, char *); +int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); #endif /* __KERNEL__ */ #endif /* NFS_IDMAP_H */ -- cgit v1.2.3 From 3f1990d30fe3d6e62d41d0224c27855b59517b8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Feb 2004 17:03:03 +0100 Subject: NFSv4: Add support for POSIX file locking. --- fs/nfs/file.c | 23 ++-- fs/nfs/nfs3proc.c | 8 ++ fs/nfs/nfs4proc.c | 290 +++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4state.c | 174 +++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 309 ++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/proc.c | 8 ++ include/linux/nfs4.h | 3 + include/linux/nfs_fs.h | 37 +++++- include/linux/nfs_page.h | 1 + include/linux/nfs_xdr.h | 65 +++++++++- 10 files changed, 878 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index b000db0f1b23..c32e7a2575d3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -278,21 +277,17 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (!inode) return -EINVAL; - /* This will be in a forthcoming patch. */ - if (NFS_PROTO(inode)->version == 4) { - printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n"); - return -EIO; - } - /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + if (NFS_PROTO(inode)->version != 4) { + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { + if (IS_GETLK(cmd)) + status = LOCK_USE_CLNT; + goto out_ok; + } } /* @@ -302,7 +297,7 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || (fl->fl_flags & FL_POSIX) != FL_POSIX) + if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; /* @@ -322,7 +317,7 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) return status; lock_kernel(); - status = nlmclnt_proc(inode, cmd, fl); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); unlock_kernel(); if (status < 0) return status; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e5b2ad2f8623..a27b48f411cb 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -896,6 +897,12 @@ nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *pa return 1; } +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -931,4 +938,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_release = nfs_release, .request_init = nfs3_request_init, .request_compatible = nfs3_request_compatible, + .lock = nfs3_proc_lock, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10f67569de73..3d509bd94b0e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -598,9 +598,7 @@ retry: .fh = &o_res.fh, .seqid = sp->so_seqid, }; - struct nfs_open_confirmres oc_res = { - .status = 0, - }; + struct nfs_open_confirmres oc_res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], .rpc_argp = &oc_arg, @@ -692,7 +690,7 @@ retry: fattr->valid = 0; if (state) - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + nfs4_copy_stateid(&arg.stateid, state, 0); else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); @@ -724,9 +722,7 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state) struct nfs_closeargs arg = { .fh = NFS_FH(inode), }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &arg, @@ -758,9 +754,7 @@ nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) .seqid = sp->so_seqid, .share_access = mode, }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], .rpc_argp = &arg, @@ -1085,7 +1079,7 @@ nfs4_proc_read(struct nfs_read_data *rdata, struct file *filp) if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&rdata->args.stateid, &state->stateid, sizeof(rdata->args.stateid)); + nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); @@ -1127,7 +1121,7 @@ nfs4_proc_write(struct nfs_write_data *wdata, struct file *filp) if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&wdata->args.stateid, &state->stateid, sizeof(wdata->args.stateid)); + nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); @@ -1163,7 +1157,7 @@ nfs4_proc_commit(struct nfs_write_data *cdata, struct file *filp) if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&cdata->args.stateid, &state->stateid, sizeof(cdata->args.stateid)); + nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); @@ -1513,7 +1507,7 @@ nfs4_restart_read(struct rpc_task *task) rpc_restart_call(task); req = nfs_list_entry(data->pages.next); if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } @@ -1564,8 +1558,9 @@ nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count) data->res.eof = 0; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1605,7 +1600,7 @@ nfs4_restart_write(struct rpc_task *task) rpc_restart_call(task); req = nfs_list_entry(data->pages.next); if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } @@ -1661,8 +1656,9 @@ nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) data->res.verf = &data->verf; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1846,6 +1842,7 @@ nfs4_request_init(struct nfs_page *req, struct file *filp) state = (struct nfs4_state *)filp->private_data; req->wb_state = state; req->wb_cred = get_rpccred(state->owner->so_cred); + req->wb_lockowner = current->files; } static int @@ -1975,6 +1972,8 @@ nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *pa state = (struct nfs4_state *)filp->private_data; if (req->wb_state != state) return 0; + if (req->wb_lockowner != current->files) + return 0; cred = state->owner->so_cred; if (req->wb_cred != cred) return 0; @@ -2032,6 +2031,262 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp) return status; } +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +int +nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + return status; +} + +int +nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(inode, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .new_lock_owner = 0, + }; + int status; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp == NULL) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner.clientid = server->nfs4_state->cl_clientid, + }; + status = -ENOMEM; + lsp = nfs4_alloc_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, 0); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, 0); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + nfs4_notify_setlk(inode, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + state = (struct nfs4_state *)filp->private_data; + BUG_ON(!state); + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, @@ -2067,6 +2322,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_release = nfs4_proc_file_release, .request_init = nfs4_request_init, .request_compatible = nfs4_request_compatible, + .lock = nfs4_proc_lock, }; /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0b391c7f4ce4..0694b2e13342 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -43,6 +43,7 @@ #include #include #include +#include #define OPENOWNER_POOL_SIZE 8 @@ -168,7 +169,7 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } -static inline u32 +u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { return clp->cl_lockowner_id ++; @@ -304,8 +305,12 @@ nfs4_alloc_open_state(void) state->state = 0; state->nreaders = 0; state->nwriters = 0; + state->flags = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); return state; } @@ -453,7 +458,7 @@ nfs4_close_state(struct nfs4_state *state, mode_t mode) list_del_init(&state->inode_states); spin_unlock(&inode->i_lock); do { - newstate = 0; + newstate = 0; if (state->state == 0) break; if (state->nreaders) @@ -478,6 +483,171 @@ nfs4_close_state(struct nfs4_state *state, mode_t mode) nfs4_put_open_state(state); } +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +struct nfs4_lock_state * +nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + lsp->ls_parent = state; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema held. +*/ +void +nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void +nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + + if (!list_empty(&lsp->ls_locks)) + return; + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void +nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + if (!atomic_dec_and_test(&lsp->ls_count)) + return; + if (!list_empty(&lsp->ls_locks)) + return; + kfree(lsp); +} + /* * Called with sp->so_sema held. * diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3a372266bb03..f0a688fa675d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -66,6 +66,10 @@ static int nfs_stat_to_errno(int); #define NFS4_MAXTAGLEN 0 #endif +/* lock,open owner id: + * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define owner_id_maxsz 1 + 1 #define compound_encode_hdr_maxsz 3 + (NFS4_MAXTAGLEN >> 2) #define compound_decode_hdr_maxsz 2 + (NFS4_MAXTAGLEN >> 2) #define op_encode_hdr_maxsz 1 @@ -222,6 +226,36 @@ static int nfs_stat_to_errno(int); decode_setclientid_confirm_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz +#define NFS4_enc_lock_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 2 + 2 + \ + 1 + 4 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lock_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + 2 + 2 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_enc_lockt_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 2 + 2 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lockt_sz NFS4_dec_lock_sz +#define NFS4_enc_locku_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2 +#define NFS4_dec_locku_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4 + static struct { @@ -596,6 +630,80 @@ encode_link(struct xdr_stream *xdr, struct nfs4_link *link) return 0; } +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static int +encode_lock(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lock_opargs *opargs = arg->u.lock; + + RESERVE_SPACE(32); + WRITE32(OP_LOCK); + WRITE32(arg->type); + WRITE32(opargs->reclaim); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE32(opargs->new_lock_owner); + if (opargs->new_lock_owner){ + struct nfs_open_to_lock *ol = opargs->u.open_lock; + + RESERVE_SPACE(40); + WRITE32(ol->open_seqid); + WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); + WRITE32(ol->lock_seqid); + WRITE64(ol->lock_owner.clientid); + WRITE32(4); + WRITE32(ol->lock_owner.id); + } + else { + struct nfs_exist_lock *el = opargs->u.exist_lock; + + RESERVE_SPACE(20); + WRITEMEM(&el->stateid, sizeof(el->stateid)); + WRITE32(el->seqid); + } + + return 0; +} + +static int +encode_lockt(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lowner *opargs = arg->u.lockt; + + RESERVE_SPACE(40); + WRITE32(OP_LOCKT); + WRITE32(arg->type); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE64(opargs->clientid); + WRITE32(4); + WRITE32(opargs->id); + + return 0; +} + +static int +encode_locku(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_locku_opargs *opargs = arg->u.locku; + + RESERVE_SPACE(44); + WRITE32(OP_LOCKU); + WRITE32(arg->type); + WRITE32(opargs->seqid); + WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITE64(arg->offset); + WRITE64(arg->length); + + return 0; +} + static int encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) { @@ -1175,6 +1283,72 @@ out: return status; } +/* + * Encode a LOCK request + */ +static int +nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int +nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int +nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} + /* * Encode a READ request */ @@ -1997,6 +2171,66 @@ decode_link(struct xdr_stream *xdr, struct nfs4_link *link) return decode_change_info(xdr, link->ln_cinfo); } +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int +decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +{ + uint32_t *p; + uint32_t namelen; + + READ_BUF(32); + READ64(denied->offset); + READ64(denied->length); + READ32(denied->type); + READ64(denied->owner.clientid); + READ32(namelen); + READ_BUF(namelen); + if (namelen == 4) + READ32(denied->owner.id); + return -NFS4ERR_DENIED; +} + +static int +decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } else if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } + return status; +} + static int decode_lookup(struct xdr_stream *xdr) { @@ -2037,10 +2271,11 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) { uint32_t *p; + int status; - res->status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); - if (res->status) - return res->status; + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status) + return status; READ_BUF(sizeof(res->stateid.data)); COPYMEM(res->stateid.data, sizeof(res->stateid.data)); return 0; @@ -2619,6 +2854,71 @@ out: return status; } +/* + * Decode LOCK response + */ +static int +nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int +nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int +nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} /* * Decode Read response @@ -2915,6 +3215,9 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(RENEW, enc_renew, dec_renew), PROC(SETCLIENTID, enc_setclientid, dec_setclientid), PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 79f18e4cfb81..3b118742286f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -653,6 +654,12 @@ nfs_request_compatible(struct nfs_page *req, struct file *filp, struct page *pag return 1; } +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ @@ -689,4 +696,5 @@ struct nfs_rpc_ops nfs_v2_clientops = { .file_release = nfs_release, .request_init = nfs_request_init, .request_compatible = nfs_request_compatible, + .lock = nfs_proc_lock, }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4a61a4682718..35baf20a5b5c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -297,6 +297,9 @@ enum { NFSPROC4_CLNT_RENEW, NFSPROC4_CLNT_SETCLIENTID, NFSPROC4_CLNT_SETCLIENTID_CONFIRM, + NFSPROC4_CLNT_LOCK, + NFSPROC4_CLNT_LOCKT, + NFSPROC4_CLNT_LOCKU, }; #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 512d9203905f..524eb6d04d7b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -542,19 +542,43 @@ struct nfs4_state_owner { /* * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple. + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). * + * OPEN: * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, * we need to know how many files are open for reading or writing on a * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ + struct nfs4_state * ls_parent; /* Parent nfs4_state */ + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, +}; + struct nfs4_state { struct list_head open_states; /* List of states for the same state_owner */ struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ struct nfs4_state_owner *owner; /* Pointer to the open owner */ struct inode *inode; /* Pointer to the inode */ + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ + nfs4_stateid stateid; unsigned int nreaders; @@ -589,6 +613,8 @@ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); @@ -598,6 +624,15 @@ extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mod extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + + struct nfs4_mount_data; #else diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 42677b62e92b..c41a4e75555e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -26,6 +26,7 @@ struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ struct file *wb_file; + fl_owner_t wb_lockowner; struct inode *wb_inode; struct rpc_cred *wb_cred; struct nfs4_state *wb_state; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 21827ad1a71e..a3ecfab78bc6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -109,7 +109,6 @@ struct nfs_openargs { }; struct nfs_openres { - __u32 status; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info * cinfo; @@ -129,7 +128,6 @@ struct nfs_open_confirmargs { }; struct nfs_open_confirmres { - __u32 status; nfs4_stateid stateid; }; @@ -157,10 +155,68 @@ struct nfs_closeargs { }; struct nfs_closeres { - __u32 status; nfs4_stateid stateid; }; +/* + * * Arguments to the lock,lockt, and locku call. + * */ +struct nfs_lowner { + __u64 clientid; + u32 id; +}; + +struct nfs_open_to_lock { + __u32 open_seqid; + nfs4_stateid open_stateid; + __u32 lock_seqid; + struct nfs_lowner lock_owner; +}; + +struct nfs_exist_lock { + nfs4_stateid stateid; + __u32 seqid; +}; +struct nfs_lock_opargs { + __u32 reclaim; + __u32 new_lock_owner; + union { + struct nfs_open_to_lock *open_lock; + struct nfs_exist_lock *exist_lock; + } u; +}; + +struct nfs_locku_opargs { + __u32 seqid; + nfs4_stateid stateid; +}; + +struct nfs_lockargs { + struct nfs_fh * fh; + __u32 type; + __u64 offset; + __u64 length; + union { + struct nfs_lock_opargs *lock; /* LOCK */ + struct nfs_lowner *lockt; /* LOCKT */ + struct nfs_locku_opargs *locku; /* LOCKU */ + } u; +}; + +struct nfs_lock_denied { + __u64 offset; + __u64 length; + __u32 type; + struct nfs_lowner owner; +}; + +struct nfs_lockres { + union { + nfs4_stateid stateid;/* LOCK success, LOCKU */ + struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ + } u; + struct nfs_server * server; +}; /* * Arguments to the read call. @@ -605,6 +661,7 @@ struct nfs_read_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct page *pagevec[NFS_READ_MAXIOV]; @@ -620,6 +677,7 @@ struct nfs_write_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ @@ -686,6 +744,7 @@ struct nfs_rpc_ops { int (*file_release) (struct inode *, struct file *); void (*request_init)(struct nfs_page *, struct file *); int (*request_compatible)(struct nfs_page *, struct file *, struct page *); + int (*lock)(struct file *, int, struct file_lock *); }; /* -- cgit v1.2.3