7 files changed, 708 insertions, 204 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1c7b45be7358..2e8d2e1daeec 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -140,6 +140,10 @@ nfs_clear_inode(struct inode *inode)
 	cred = nfsi->cache_access.cred;
 	if (cred)
 		put_rpccred(cred);
+	/* Clean up the V4 state */
+	nfs4_put_shareowner(inode, nfsi->wo_owner);
+	nfs4_put_shareowner(inode, nfsi->ro_owner);
+	nfs4_put_shareowner(inode, nfsi->rw_owner);
 }
 
 void
@@ -1492,9 +1496,18 @@ static struct file_system_type nfs4_fs_type = {
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME,
 };
+
+#define nfs4_zero_state(nfsi) \
+	do { \
+		(nfsi)->wo_owner = NULL; \
+		(nfsi)->ro_owner = NULL; \
+		(nfsi)->rw_owner = NULL; \
+	} while(0)
 #define register_nfs4fs() register_filesystem(&nfs4_fs_type)
 #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
 #else
+#define nfs4_zero_state(nfsi) \
+	do { } while (0)
 #define register_nfs4fs() (0)
 #define unregister_nfs4fs()
 #endif
@@ -1516,6 +1529,7 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 		return NULL;
 	nfsi->flags = 0;
 	nfsi->mm_cred = NULL;
+	nfs4_zero_state(nfsi);
 	return &nfsi->vfs_inode;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ccbca9dace46..15ce5d40f2c2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -347,48 +347,6 @@ nfs4_setup_putrootfh(struct nfs4_compound *cp)
 }
 
 static void
-nfs4_setup_open(struct nfs4_compound *cp, int flags, struct qstr *name,
-		struct iattr *sattr, char *stateid, struct nfs4_change_info *cinfo,
-		u32 *rflags)
-{
-	struct nfs4_open *open = GET_OP(cp, open);
-
-	BUG_ON(cp->flags);
-	
-	open->op_client_state = cp->server->nfs4_state;
-	open->op_share_access = flags & 3;
-	open->op_opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE;
-	open->op_createmode = NFS4_CREATE_UNCHECKED;
-	open->op_attrs = sattr;
-	if (flags & O_EXCL) {
-		u32 *p = (u32 *) open->op_verifier;
-		p[0] = jiffies;
-		p[1] = current->pid;
-		open->op_createmode = NFS4_CREATE_EXCLUSIVE;
-	}
-	open->op_name = name;
-	open->op_stateid = stateid;
-	open->op_cinfo = cinfo;
-	open->op_rflags = rflags;
-
-	OPNUM(cp) = OP_OPEN;
-	cp->req_nops++;
-	cp->renew_index = cp->req_nops;
-}
-
-static void
-nfs4_setup_open_confirm(struct nfs4_compound *cp, char *stateid)
-{
-	struct nfs4_open_confirm *open_confirm = GET_OP(cp, open_confirm);
-	
-	open_confirm->oc_stateid = stateid;
-
-	OPNUM(cp) = OP_OPEN_CONFIRM;
-	cp->req_nops++;
-	cp->renew_index = cp->req_nops;
-}
-
-static void
 nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier,
 		     struct page **pages, unsigned int bufsize, struct dentry *dentry)
 {
@@ -626,44 +584,114 @@ process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr)
 	}
 }
 
-static int
-do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr,
-	struct nfs_fattr *fattr, struct nfs_fh *fhandle, u32 *seqid, char *stateid)
-{
-	struct nfs4_compound	compound;
-	struct nfs4_op		ops[7];
-	struct nfs4_change_info	dir_cinfo;
-	struct nfs_fattr	dir_attr;
-	u32			dir_bmres[2];
-	u32			bmres[2];
-	u32			rflags;
-	int			status;
-
-	dir_attr.valid = 0;
-	fattr->valid = 0;
-	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open");
-	nfs4_setup_putfh(&compound, NFS_FH(dir));
-	nfs4_setup_savefh(&compound);
-	nfs4_setup_open(&compound, flags, name, sattr, stateid, &dir_cinfo, &rflags);
-	nfs4_setup_getattr(&compound, fattr, bmres);
-	nfs4_setup_getfh(&compound, fhandle);
-	nfs4_setup_restorefh(&compound);
-	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
-	if ((status = nfs4_call_compound(&compound, NULL, 0)))
-		return status;
+int
+nfs4_do_open(struct inode *dir, struct qstr *name, int flags,
+		struct iattr *sattr, struct nfs_fattr *fattr,
+		struct nfs_fh *fhandle, struct nfs4_shareowner **spp)
+{
+	struct nfs4_shareowner  *sp;
+	struct nfs_server       *server = NFS_SERVER(dir);
+	struct nfs4_change_info d_cinfo;
+	int                     status;
+	u32                     f_bmres[2];
+	u32                     d_bmres[2];
+	struct nfs_fattr        d_attr = {
+		.valid          0,
+	};
+	struct nfs_fattr        f_attr = {
+		.valid          0,
+	};
+	struct nfs4_getattr     f_getattr = {
+		.gt_bmval       = nfs4_fattr_bitmap,
+		.gt_attrs       = (fattr == NULL ? &f_attr: fattr),
+		.gt_bmres       = f_bmres,
+	};
+	struct nfs4_getattr     d_getattr = {
+		.gt_bmval       = nfs4_fattr_bitmap,
+		.gt_attrs       = &d_attr,
+		.gt_bmres       = d_bmres,
+	};
+	struct nfs_openargs o_arg = {
+		.fh             = NFS_FH(dir),
+		.share_access   = flags & O_ACCMODE,
+		.clientid       = NFS_SERVER(dir)->nfs4_state->cl_clientid,
+		.opentype       = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE,
+		.createmode     = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED,
+		.name           = name,
+		.f_getattr      = &f_getattr,
+		.d_getattr      = &d_getattr,
+	};
+	struct nfs_openres o_res = {
+		.cinfo          = &d_cinfo,
+		.f_getattr      = &f_getattr,
+		.d_getattr      = &d_getattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+		.rpc_argp       = &o_arg,
+		.rpc_resp       = &o_res,
+	};
 
-	process_cinfo(&dir_cinfo, &dir_attr);
-	nfs_refresh_inode(dir, &dir_attr);
-	if (!(rflags & NFS4_OPEN_RESULT_CONFIRM)) {
-		*seqid = 1;
-		return 0;
+	status = -ENOMEM;
+	if (!(sp = nfs4_get_shareowner(dir))) {
+		dprintk("nfs4_do_open: nfs4_get_shareowner failed!\n");
+		goto out;
+	}
+	if (o_arg.createmode & NFS4_CREATE_EXCLUSIVE){
+		u32 *p = (u32 *) o_arg.u.verifier;
+		p[0] = jiffies;
+		p[1] = current->pid;
+	} else if (o_arg.createmode == NFS4_CREATE_UNCHECKED) {
+		o_arg.u.attrs = sattr;
+	}
+	/* Serialization for the sequence id */
+	down(&sp->so_sema);
+	o_arg.seqid = sp->so_seqid;
+	o_arg.id = sp->so_id;
+
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (status) {
+		goto out_up;
+	}
+	nfs4_increment_seqid(status, sp);
+	process_cinfo(&d_cinfo, &d_attr);
+	nfs_refresh_inode(dir, &d_attr);
+
+	if (fhandle) {
+		memset(fhandle, 0, sizeof(*fhandle));
+		fhandle->size = (o_res.fh.size < NFS_MAXFHSIZE ? o_res.fh.size : NFS_MAXFHSIZE);
+		memcpy(fhandle->data, o_res.fh.data, fhandle->size);
 	}
-	*seqid = 2;
 
-	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open_confirm");
-	nfs4_setup_putfh(&compound, fhandle);
-	nfs4_setup_open_confirm(&compound, stateid);
-	return nfs4_call_compound(&compound, NULL, 0);
+	if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
+		struct nfs_open_confirmargs oc_arg = {
+			.fh             = &o_res.fh,
+			.seqid          = sp->so_seqid,
+		};
+		struct nfs_open_confirmres oc_res = {
+			.status         = 0,
+		};
+		struct 	rpc_message msg = {
+			.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+			.rpc_argp       = &oc_arg,
+			.rpc_resp       = &oc_res,
+		};
+
+		memcpy(oc_arg.stateid, o_res.stateid, sizeof(nfs4_stateid));
+		status = rpc_call_sync(server->client, &msg, 0);
+		if (status)
+			goto out_up;
+		nfs4_increment_seqid(status, sp);
+		memcpy(sp->so_stateid, oc_res.stateid, sizeof(nfs4_stateid));
+	} else
+		memcpy(sp->so_stateid, o_res.stateid, sizeof(nfs4_stateid));
+	sp->so_flags = flags & O_ACCMODE;
+
+out_up:
+	up(&sp->so_sema);
+out:
+	*spp = sp;
+	return status;
 }
 
 static int
@@ -799,19 +827,19 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	struct inode *		inode = dentry->d_inode;
 	int			size_change = sattr->ia_valid & ATTR_SIZE;
 	struct nfs_fh		throwaway_fh;
-	u32			seqid;
-	nfs4_stateid		stateid;
-	int			status;
+	struct nfs4_shareowner	*sp = NULL;
+	int			status, fake = 1;
 
 	fattr->valid = 0;
 	
 	if (size_change) {
-		status = do_open(dentry->d_parent->d_inode, &dentry->d_name,
-				 NFS4_SHARE_ACCESS_WRITE, NULL, fattr,
-				 &throwaway_fh, &seqid, stateid);
+		status = nfs4_do_open(dentry->d_parent->d_inode, 
+				&dentry->d_name,
+				O_WRONLY, NULL, fattr,
+				&throwaway_fh,&sp);
 		if (status)
 			return status;
-
+		fake = 0;
 		/*
 		 * Because OPEN is always done by name in nfsv4, it is
 		 * possible that we opened a different file by the same
@@ -823,16 +851,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		 */
 		if (fattr->fileid != NFS_FILEID(inode)) {
 			printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n");
-			do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+			do_close(NFS_SERVER(inode), NFS_FH(inode), sp->so_seqid, sp->so_stateid);
 			return -EIO;
 		}
 	}
-	else
-		memcpy(stateid, zero_stateid, sizeof(nfs4_stateid));
 	
-	status = do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, stateid);
+	status = do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, 
+	                    fake == 1? zero_stateid: sp->so_stateid);
 	if (size_change)
-		do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+		do_close(NFS_SERVER(inode), NFS_FH(inode), sp->so_seqid, sp->so_stateid);
 	return status;
 }
 
@@ -1025,21 +1052,38 @@ nfs4_proc_write(struct inode *inode, struct rpc_cred *cred,
 	return rpc_call_sync(server->client, &msg, rpcflags);
 }
 
+/*
+ * Got race?
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this create/open method is prone to inefficiency and race
+ * conditions due to the lookup, create, and open VFS calls from sys_open()
+ * placed on the wire.
+ *
+ * Given the above sorry state of affairs, I'm simply sending an OPEN, a
+ * possible SETATTR, and then a CLOSE 
+ * The file will be opened again in the subsequent VFS open call
+ * (nfs4_proc_file_open).
+ *
+ * The open for read will just hang around to be used by any process that
+ * opens the file O_RDONLY. This will all be resolved with the VFS changes.
+ */
+
 static int
 nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
-		 int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+                 int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-	int			oflags;
-	u32			seqid;
-	nfs4_stateid		stateid;
-	int 			status;
+	int                     oflags;
+	struct nfs4_shareowner   *sp = NULL;
+	int                     status;
 
-	oflags = NFS4_SHARE_ACCESS_READ | O_CREAT | (flags & O_EXCL);
-	status = do_open(dir, name, oflags, sattr, fattr, fhandle, &seqid, stateid);
+	oflags = O_RDONLY | O_CREAT | (flags & O_EXCL);
+	status = nfs4_do_open(dir, name, oflags, sattr, fattr, fhandle, &sp);
 	if (!status) {
-		if (flags & O_EXCL)
-			status = do_setattr(NFS_SERVER(dir), fattr, fhandle, sattr, stateid);
-		do_close(NFS_SERVER(dir), fhandle, seqid, stateid);
+		if (flags & O_EXCL) {
+			status = do_setattr(NFS_SERVER(dir), fattr,
+			                     fhandle, sattr, sp->so_stateid);
+		/* XXX should i bother closing the file? */
+		}
 	}
 	return status;
 }
@@ -1561,15 +1605,54 @@ nfs4_proc_renew(struct nfs_server *server)
 }
 
 /*
-* To be changed into a real NFSv4 file_open soon. 
-*/
-
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this open method is prone to inefficiency and race conditions
+ * due to the lookup, potential create, and open VFS calls from sys_open()
+ * placed on the wire.
+ */
 int
 nfs4_proc_file_open(struct inode *inode, struct file *filp)
 {
-	return 0;
+	struct dentry *dentry = filp->f_dentry;
+	struct inode *dir = dentry->d_parent->d_inode;
+	int flags, status = 0;
+
+	dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
+	                       (int)dentry->d_parent->d_name.len,
+	                       dentry->d_parent->d_name.name,
+	                       (int)dentry->d_name.len, dentry->d_name.name);
+
+	lock_kernel();
+
+	/* isn't this done in open_namei? */
+	if (!S_ISREG(inode->i_mode)) {
+		status = -EISDIR;
+		goto out;
+	}
+
+	flags = filp->f_flags & O_ACCMODE;
+
+/*
+* Got race??
+* We have already opened the file "O_EXCL" in nfs4_proc_create!!
+* This ugliness will go away with lookup-intent...
+*/
+	while (!nfs4_get_inode_share(inode, flags)) {
+		struct nfs4_shareowner *sp = NULL;
+		status = nfs4_do_open(dir, &dentry->d_name, flags, NULL, NULL, NULL, &sp);
+		if (status) {
+			nfs4_put_shareowner(inode,sp);
+			break;
+		}
+		if (nfs4_set_inode_share(inode, sp, flags))
+			nfs4_put_shareowner(inode,sp);
+	}
+out:
+	unlock_kernel();
+	return status;
 }
 
+
 struct nfs_rpc_ops	nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.getroot	= nfs4_proc_get_root,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ecbc54fb1048..059880274526 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -42,6 +42,16 @@
 #include <linux/slab.h>
 #include <linux/nfs_fs.h>
 
+/* This protects most of the client-side state. */
+static spinlock_t               state_spinlock = SPIN_LOCK_UNLOCKED;
+
+nfs4_stateid zero_stateid =
+	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+nfs4_stateid one_stateid =
+	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+
 /*
  * nfs4_get_client(): returns an empty client structure
  * nfs4_put_client(): drops reference to client structure
@@ -52,26 +62,164 @@
 struct nfs4_client *
 nfs4_get_client(void)
 {
-        struct nfs4_client *clp;
+	struct nfs4_client *clp;
 
-        if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) {
-                atomic_set(&clp->cl_count, 1);
-                clp->cl_clientid = 0;
-                INIT_LIST_HEAD(&clp->cl_lockowners);
-        }
-        return clp;
+	if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)))
+		memset(clp, 0, sizeof(nfs4_verifier));
+	return clp;
 }
 
 void
 nfs4_put_client(struct nfs4_client *clp)
 {
-        BUG_ON(!clp);
-        BUG_ON(!atomic_read(&clp->cl_count));
-        
-        if (atomic_dec_and_test(&clp->cl_count)) {
-                BUG_ON(!list_empty(&clp->cl_lockowners));
-                kfree(clp);
+	BUG_ON(!clp);
+	kfree(clp);
+}
+
+static inline u32
+nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+{
+	u32 res;
+
+	spin_lock(&state_spinlock);
+	res = clp->cl_lockowner_id ++;
+	spin_unlock(&state_spinlock);
+	return res;
+}
+
+/*
+ * nfs4_get_shareowner(): this is called on the OPEN or CREATE path to
+ * obtain a new shareowner.
+ *
+ * There are three shareowners (open_owner4 in rfc3010) per inode,
+ * one for each possible combination of share lock access. Since
+ * Linux does not support the deny access type, there are
+ * three (not 9) referenced by the nfs_inode:
+ *
+ * O_WRONLY: inode->wo_owner
+ * O_RDONLY: inode->ro_owner
+ * O_RDWR:   inode->rw_owner
+ *
+ * We create a new shareowner the first time a file is OPENed with
+ * one of the above shares. All other OPENs with a similar
+ * share use the single stateid associated with the inode.
+ *
+ */
+struct nfs4_shareowner *
+nfs4_get_shareowner(struct inode *dir)
+{
+	struct nfs4_client *clp;
+	struct nfs4_shareowner *sp;
+
+	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	if (!sp)
+		return NULL;
+	clp = (NFS_SB(dir->i_sb))->nfs4_state;
+	BUG_ON(!clp);
+	init_MUTEX(&sp->so_sema);
+	sp->so_seqid = 0;                 /* arbitrary */
+	memset(sp->so_stateid, 0, sizeof(nfs4_stateid));
+	sp->so_id = nfs4_alloc_lockowner_id(clp);
+	return sp;
+}
+
+/*
+ * Called for each inode shareowner in nfs_clear_inode, 
+ * or if nfs4_do_open fails.
+ */
+void
+nfs4_put_shareowner(struct inode *inode, struct nfs4_shareowner *sp)
+{
+	if (!sp)
+		return;
+        kfree(sp);
+}
+
+/*
+* Called with sp->so_sema held.
+*
+* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+* failed with a seqid incrementing error -
+* see comments nfs_fs.h:seqid_mutating_error()
+*/
+void
+nfs4_increment_seqid(u32 status, struct nfs4_shareowner *sp)
+{
+	if (status == NFS_OK || seqid_mutating_err(status))
+		sp->so_seqid++;
+}
+
+/*
+* Called by nfs4_proc_open to set the appropriate stateid
+*/
+int
+nfs4_set_inode_share(struct inode * inode, struct nfs4_shareowner *sp, unsigned int open_flags)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	switch (open_flags & O_ACCMODE) {
+		case O_RDONLY:
+			if (!nfsi->ro_owner) {
+				nfsi->ro_owner = sp;
+				return 0;
+			}
+			break;
+		case O_WRONLY:
+			if (!nfsi->wo_owner) {
+				nfsi->wo_owner = sp;
+				return 0;
+			}
+			break;
+		case O_RDWR:
+			if (!nfsi->rw_owner) {
+				nfsi->rw_owner = sp;
+				return 0;
+			}
+	}
+	return -EBUSY;
+}
+
+/*
+* Boolean test to determine if an OPEN call goes on the wire.
+*
+* Called by nfs4_proc_open.
+*/
+int
+nfs4_test_shareowner(struct inode *inode, unsigned int open_flags)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	switch (open_flags & O_ACCMODE) {
+		case O_RDONLY:
+			if(nfsi->ro_owner)
+				return 0;
+			break;
+		case O_WRONLY:
+			if(nfsi->wo_owner)
+				return 0;
+			break;
+		case O_RDWR:
+			if(nfsi->rw_owner)
+				return 0;
         }
+        return 1;
+}
+
+struct nfs4_shareowner *
+nfs4_get_inode_share(struct inode * inode, unsigned int open_flags)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	switch (open_flags & O_ACCMODE) {
+		case O_RDONLY:
+			return nfsi->ro_owner;
+		case O_WRONLY:
+			return nfsi->wo_owner;
+		case O_RDWR:
+			return nfsi->rw_owner;
+	}
+	/* Duh gcc warning if we don't... */
+	return NULL;
 }
 
 /*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index df753fc10ff4..319d16b0bd01 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -72,6 +72,17 @@ extern int			nfs_stat_to_errno(int);
 #define encode_putfh_maxsz	op_encode_hdr_maxsz + 1 + \
 				(NFS4_FHSIZE >> 2)
 #define decode_putfh_maxsz	op_decode_hdr_maxsz
+#define encode_getfh_maxsz      op_encode_hdr_maxsz
+#define decode_getfh_maxsz      op_decode_hdr_maxsz + 1 + \
+                                (NFS4_FHSIZE >> 2)
+#define encode_getattr_maxsz    op_encode_hdr_maxsz + 3
+#define nfs4_fattr_bitmap_maxsz 26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2)
+#define decode_getattr_maxsz    op_decode_hdr_maxsz + 3 + \
+                                nfs4_fattr_bitmap_maxsz
+#define encode_savefh_maxsz     op_encode_hdr_maxsz
+#define decode_savefh_maxsz     op_decode_hdr_maxsz
+#define encode_restorefh_maxsz  op_encode_hdr_maxsz
+#define decode_restorefh_maxsz  op_decode_hdr_maxsz
 #define encode_read_getattr_maxsz	op_encode_hdr_maxsz + 2
 #define decode_read_getattr_maxsz	op_decode_hdr_maxsz + 8
 #define encode_pre_write_getattr_maxsz	op_encode_hdr_maxsz + 2
@@ -109,6 +120,31 @@ extern int			nfs_stat_to_errno(int);
 				decode_pre_write_getattr_maxsz + \
 				op_decode_hdr_maxsz + 2 + \
 				decode_post_write_getattr_maxsz
+#define NFS4_enc_open_sz        compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                encode_savefh_maxsz + \
+                                op_encode_hdr_maxsz + \
+                                13 + 3 + 2 + 64 + \
+                                encode_getattr_maxsz + \
+                                encode_getfh_maxsz + \
+                                encode_restorefh_maxsz + \
+                                encode_getattr_maxsz
+#define NFS4_dec_open_sz        compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                decode_savefh_maxsz + \
+                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
+                                decode_getattr_maxsz + \
+                                decode_getfh_maxsz + \
+                                decode_restorefh_maxsz + \
+                                decode_getattr_maxsz
+#define NFS4_enc_open_confirm_sz      \
+                                compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 5
+#define NFS4_dec_open_confirm_sz        compound_decode_hdr_maxsz + \
+                                        decode_putfh_maxsz + \
+                                        op_decode_hdr_maxsz + 4
+
 
 
 static struct {
@@ -503,69 +539,76 @@ encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup)
 }
 
 static int
-encode_open(struct xdr_stream *xdr, struct nfs4_open *open)
+encode_open(struct xdr_stream *xdr, struct nfs_openargs *arg)
 {
-	static int global_id = 0;
-	int id = global_id++;
 	int status;
 	uint32_t *p;
-	
-	/* seqid, share_access, share_deny, clientid, ownerlen, owner, opentype */
+
+ /*
+ * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
+ * owner 4, opentype 4 = 36
+ */
 	RESERVE_SPACE(36);
 	WRITE32(OP_OPEN);
-	WRITE32(0);                       /* seqid */
-	WRITE32(open->op_share_access);
-	WRITE32(0);                       /* for us, share_deny== 0 always */
-	WRITE64(open->op_client_state->cl_clientid);
+	WRITE32(arg->seqid);
+	switch (arg->share_access) {
+		case O_RDONLY:
+			WRITE32(NFS4_SHARE_ACCESS_READ);
+			break;
+		case O_WRONLY:
+			WRITE32(NFS4_SHARE_ACCESS_WRITE);
+			break;
+		case O_RDWR:
+			WRITE32(NFS4_SHARE_ACCESS_BOTH);
+	}
+	WRITE32(0);                  /* for linux, share_deny = 0 always */
+	WRITE64(arg->clientid);
 	WRITE32(4);
-	WRITE32(id);
-	WRITE32(open->op_opentype);
-	
-	if (open->op_opentype == NFS4_OPEN_CREATE) {
-		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE) {
-			RESERVE_SPACE(4+sizeof(nfs4_verifier));
-			WRITE32(open->op_createmode);
-			WRITEMEM(open->op_verifier, sizeof(nfs4_verifier));
+	WRITE32(arg->id);
+	WRITE32(arg->opentype);
+
+	if (arg->opentype == NFS4_OPEN_CREATE) {
+		if (arg->createmode == NFS4_CREATE_EXCLUSIVE) {
+			RESERVE_SPACE(12);
+			WRITE32(arg->createmode);
+			WRITEMEM(arg->u.verifier, sizeof(nfs4_verifier));
 		}
-		else if (open->op_attrs) {
+		else if (arg->u.attrs) {
 			RESERVE_SPACE(4);
-			WRITE32(open->op_createmode);
-			if ((status = encode_attrs(xdr, open->op_attrs)))
+			WRITE32(arg->createmode);
+			if ((status = encode_attrs(xdr, arg->u.attrs)))
 				return status;
 		}
 		else {
 			RESERVE_SPACE(12);
-			WRITE32(open->op_createmode);
+			WRITE32(arg->createmode);
 			WRITE32(0);
 			WRITE32(0);
 		}
 	}
 
-	RESERVE_SPACE(8 + open->op_name->len);
+	RESERVE_SPACE(8 + arg->name->len);
 	WRITE32(NFS4_OPEN_CLAIM_NULL);
-	WRITE32(open->op_name->len);
-	WRITEMEM(open->op_name->name, open->op_name->len);
-	
+	WRITE32(arg->name->len);
+	WRITEMEM(arg->name->name, arg->name->len);
+
 	return 0;
 }
 
 static int
-encode_open_confirm(struct xdr_stream *xdr, struct nfs4_open_confirm *open_confirm)
+encode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmargs *arg)
 {
 	uint32_t *p;
 
-	/*
-	 * Note: In this "stateless" implementation, the OPEN_CONFIRM
-	 * seqid is always equal to 1.
-	 */
 	RESERVE_SPACE(8+sizeof(nfs4_stateid));
 	WRITE32(OP_OPEN_CONFIRM);
-	WRITEMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
-	WRITE32(1);
-	
+	WRITEMEM(arg->stateid, sizeof(nfs4_stateid));
+	WRITE32(arg->seqid);
+
 	return 0;
 }
 
+
 static int
 encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
@@ -833,12 +876,6 @@ encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs
 		case OP_LOOKUP:
 			status = encode_lookup(xdr, &cp->ops[i].u.lookup);
 			break;
-		case OP_OPEN:
-			status = encode_open(xdr, &cp->ops[i].u.open);
-			break;
-		case OP_OPEN_CONFIRM:
-			status = encode_open_confirm(xdr, &cp->ops[i].u.open_confirm);
-			break;
 		case OP_PUTFH:
 			status = encode_putfh(xdr, cp->ops[i].u.putfh.pf_fhandle);
 			break;
@@ -903,6 +940,65 @@ nfs4_xdr_enc_compound(struct rpc_rqst *req, uint32_t *p, struct nfs4_compound *c
 	cp->timestamp = jiffies;
 	return status;
 }
+/*
+ * Encode an OPEN request
+ */
+static int
+nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 7,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_savefh(&xdr);
+	if (status)
+		goto out;
+	status = encode_open(&xdr,args);
+	if (status)
+		goto out;
+	status = encode_getattr(&xdr, args->f_getattr);
+	if (status)
+		goto out;
+	status = encode_getfh(&xdr);
+	if (status)
+		goto out;
+	status = encode_restorefh(&xdr);
+	if (status)
+		goto out;
+	status = encode_getattr(&xdr, args->d_getattr);
+out:
+	return status;
+}
+
+/*
+ * Encode an OPEN_CONFIRM request
+ */
+static int
+nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_open_confirm(&xdr, args);
+out:
+	return status;
+}
+
 
 /*
  * Encode a READ request
@@ -968,7 +1064,7 @@ out:
 }
 
 /*
- * Encode a COMMIT request
+ *  a COMMIT request
  */
 static int
 nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
@@ -1611,49 +1707,49 @@ decode_lookup(struct xdr_stream *xdr)
 }
 
 static int
-decode_open(struct xdr_stream *xdr, struct nfs4_open *open)
+decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 {
-	uint32_t *p;
-	uint32_t bmlen, delegation_type;
-	int status;
-	
-	status = decode_op_hdr(xdr, OP_OPEN);
-	if (status)
-		return status;
-	READ_BUF(sizeof(nfs4_stateid));
-	COPYMEM(open->op_stateid, sizeof(nfs4_stateid));
+        uint32_t *p;
+        uint32_t bmlen, delegation_type;
+        int status;
 
-	decode_change_info(xdr, open->op_cinfo);
+        status = decode_op_hdr(xdr, OP_OPEN);
+        if (status)
+                return status;
+        READ_BUF(sizeof(nfs4_stateid));
+        COPYMEM(res->stateid, sizeof(nfs4_stateid));
 
-	READ_BUF(8);
-	READ32(*open->op_rflags);
-	READ32(bmlen);
-	if (bmlen > 10)
-		goto xdr_error;
-		
-	READ_BUF((bmlen << 2) + 4);
-	p += bmlen;
-	READ32(delegation_type);
-	if (delegation_type != NFS4_OPEN_DELEGATE_NONE)
-		goto xdr_error;
-	
-	DECODE_TAIL;
+        decode_change_info(xdr, res->cinfo);
+
+        READ_BUF(8);
+        READ32(res->rflags);
+        READ32(bmlen);
+        if (bmlen > 10)
+                goto xdr_error;
+
+        READ_BUF((bmlen << 2) + 4);
+        p += bmlen;
+        READ32(delegation_type);
+        if (delegation_type != NFS4_OPEN_DELEGATE_NONE)
+                goto xdr_error;
+
+        DECODE_TAIL;
 }
 
 static int
-decode_open_confirm(struct xdr_stream *xdr, struct nfs4_open_confirm *open_confirm)
+decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
 {
-	uint32_t *p;
-	int status;
+        uint32_t *p;
 
-	status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
-	if (status)
-		return status;
-	READ_BUF(sizeof(nfs4_stateid));
-	COPYMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
-	return 0;
+        res->status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
+        if (res->status)
+                return res->status;
+        READ_BUF(sizeof(nfs4_stateid));
+        COPYMEM(res->stateid, sizeof(nfs4_stateid));
+        return 0;
 }
 
+
 static int
 decode_putfh(struct xdr_stream *xdr)
 {
@@ -1998,12 +2094,6 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs
 		case OP_LOOKUP:
 			status = decode_lookup(xdr);
 			break;
-		case OP_OPEN:
-			status = decode_open(xdr, &op->u.open);
-			break;
-		case OP_OPEN_CONFIRM:
-			status = decode_open_confirm(xdr, &op->u.open_confirm);
-			break;
 		case OP_PUTFH:
 			status = decode_putfh(xdr);
 			break;
@@ -2075,6 +2165,72 @@ out:
 	return status;
 }
 
+
+/*
+ * Decode OPEN response
+ */
+static int
+nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+	struct nfs4_getfh gfh	= {
+		.gf_fhandle = &res->fh,
+	};
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_savefh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open(&xdr, res);
+        if (status)
+                goto out;
+        status = decode_getattr(&xdr, res->f_getattr);
+        if (status)
+                goto out;
+        status = decode_getfh(&xdr, &gfh);
+        if (status)
+                goto out;
+        status = decode_restorefh(&xdr);
+        if (status)
+                goto out;
+        status = decode_getattr(&xdr, res->d_getattr);
+        if (status)
+                goto out;
+out:
+        return status;
+}
+
+/*
+ * Decode OPEN_CONFIRM response
+ */
+static int
+nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open_confirm(&xdr, res);
+out:
+        return status;
+}
+
+
 /*
  * Decode Read response
  */
@@ -2216,6 +2372,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(READ,		enc_read,	dec_read),
   PROC(WRITE,		enc_write,	dec_write),
   PROC(COMMIT,		enc_commit,	dec_commit),
+  PROC(OPEN,		enc_open,	dec_open),
+  PROC(OPEN_CONFIRM,	enc_open_confirm,	dec_open_confirm),
 };
 
 struct rpc_version		nfs_version4 = {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7c0d596db02f..02c77391d6f8 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -206,6 +206,8 @@ enum {
 	NFSPROC4_CLNT_READ,
 	NFSPROC4_CLNT_WRITE,
 	NFSPROC4_CLNT_COMMIT,
+	NFSPROC4_CLNT_OPEN,
+	NFSPROC4_CLNT_OPEN_CONFIRM,
 };
 
 #endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bc44563921f7..7d53fd8a8710 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -155,6 +155,13 @@ struct nfs_inode {
 
 	wait_queue_head_t	nfs_i_wait;
 
+#ifdef CONFIG_NFS_V4
+        /* NFSv4 state */
+	struct nfs4_shareowner   *ro_owner;
+	struct nfs4_shareowner   *wo_owner;
+	struct nfs4_shareowner   *rw_owner;
+#endif /* CONFIG_NFS_V4*/
+
 	struct inode		vfs_inode;
 };
 
@@ -435,28 +442,74 @@ extern void * nfs_root_data(void);
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
 #ifdef CONFIG_NFS_V4
+
+/*
+ * In a seqid-mutating op, this macro controls which error return
+ * values trigger incrementation of the seqid.
+ *
+ * from rfc 3010:
+ * The client MUST monotonically increment the sequence number for the
+ * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
+ * operations.  This is true even in the event that the previous
+ * operation that used the sequence number received an error.  The only
+ * exception to this rule is if the previous operation received one of
+ * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
+ * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
+ * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
+ *
+ */
+#define seqid_mutating_err(err)       \
+(((err) != NFSERR_STALE_CLIENTID) &&  \
+ ((err) != NFSERR_STALE_STATEID)  &&  \
+ ((err) != NFSERR_BAD_STATEID)    &&  \
+ ((err) != NFSERR_BAD_SEQID)      &&  \
+ ((err) != NFSERR_BAD_XDR)        &&  \
+ ((err) != NFSERR_RESOURCE)       &&  \
+ ((err) != NFSERR_NOFILEHANDLE))
+
 struct nfs4_client {
-        atomic_t                cl_count;       /* refcount */
         u64                     cl_clientid;    /* constant */
-	 nfs4_verifier           cl_confirm;     
+	nfs4_verifier           cl_confirm;     
 
-        /*
-         * Starts a list of lockowners, linked through lo_list.
-	 */
-        struct list_head        cl_lockowners;  /* protected by state_spinlock */
+	u32			cl_lockowner_id;
+};
+
+/*
+* The ->so_sema is held during all shareowner seqid-mutating operations:
+* OPEN, OPEN_DOWNGRADE, and CLOSE.
+* Its purpose is to properly serialize so_seqid, as mandated by
+* the protocol.
+*/
+struct nfs4_shareowner {
+	u32                  so_id;      /* 32-bit identifier, unique */
+	struct semaphore     so_sema;
+	u32                  so_seqid;   /* protected by so_sema */
+	nfs4_stateid         so_stateid; /* protected by so_sema */
+	unsigned int         so_flags;   /* protected by so_sema */
 };
 
+
 /* nfs4proc.c */
 extern int nfs4_proc_renew(struct nfs_server *server);
 
 /* nfs4renewd.c */
 extern int nfs4_init_renewd(struct nfs_server *server);
-#endif /* CONFIG_NFS_V4 */
-
-#ifdef CONFIG_NFS_V4
 
+/* nfs4state.c */
 extern struct nfs4_client *nfs4_get_client(void);
 extern void nfs4_put_client(struct nfs4_client *clp);
+extern struct nfs4_shareowner * nfs4_get_shareowner(struct inode *inode);
+void nfs4_put_shareowner(struct inode *inode, struct nfs4_shareowner *sp);
+extern int nfs4_set_inode_share(struct inode * inode,
+                     struct nfs4_shareowner *sp, unsigned int flags);
+extern void nfs4_increment_seqid(u32 status, struct nfs4_shareowner *sp);
+extern int nfs4_test_shareowner(struct inode *inode, unsigned int open_flags);
+struct nfs4_shareowner * nfs4_get_inode_share(struct inode * inode, unsigned int open_flags);
+
+
+
+
+
 
 struct nfs4_mount_data;
 static inline int
@@ -481,6 +534,7 @@ destroy_nfsv4_state(struct nfs_server *server)
 #else
 #define create_nfsv4_state(server, data)  0
 #define destroy_nfsv4_state(server)       do { } while (0)
+#define nfs4_put_shareowner(inode, owner) do { } while (0)
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 980705c7307e..2d1544931139 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -88,6 +88,51 @@ struct nfs_pathconf {
 };
 
 /*
+ * Arguments to the open call.
+ */
+struct nfs_openargs {
+	struct nfs_fh *         fh;
+	__u32                   seqid;
+	__u32                   share_access;
+	__u64                   clientid;
+	__u32                   id;
+	__u32                   opentype;
+	__u32                   createmode;
+	union {
+		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+		nfs4_verifier   verifier; /* EXCLUSIVE */
+	} u;
+	struct qstr *           name;
+	struct nfs4_getattr *   f_getattr;
+	struct nfs4_getattr *   d_getattr;
+};
+
+struct nfs_openres {
+	__u32                   status;
+	nfs4_stateid            stateid;
+	struct nfs_fh           fh;
+	struct nfs4_change_info * cinfo;
+	__u32                   rflags;
+	struct nfs4_getattr *   f_getattr;
+	struct nfs4_getattr *   d_getattr;
+};
+
+/*
+ * Arguments to the open_confirm call.
+ */
+struct nfs_open_confirmargs {
+	struct nfs_fh *         fh;
+	nfs4_stateid            stateid;
+	__u32                   seqid;
+};
+
+struct nfs_open_confirmres {
+	__u32                   status;
+	nfs4_stateid            stateid;
+};
+
+
+/*
  * Arguments to the read call.
  */