[PATCH] add file_operations.fcntl

From: Chuck Lever <cel@citi.umich.edu> O_DIRECT|O_APPEND cannot possibly work on NFS, so NFS needs some way of preventing the user from setting this combination. We felt that the best way of implementing this restriction is to allow the filesytem to implement its own fcntl() handler. This patch does, that, and provide the appropriate handler for NFS. Additional details from Chuck: Forgetting O_DIRECT for a moment, O_APPEND writes on NFS don't work in any case when multiple clients are writing to a file, since an NFS client can never guarantee it knows where the true end of file is 100% of the time. it works as expected iff only one client writes to an O_APPEND file at a time. Multi-client O_APPEND writing doesn't seem to be a problem for any application I'm aware of. Since it can be made to behave in the multi-client case with careful application logic or by using file locking, I don't think we should disallow it. I want to drop the inode semaphore when doing NFS direct I/O because it is synchronous; holding the i_sem means we reduce direct I/O concurrency to one I/O per file at a time. the important thing sct was worried about was the case where a single client is writing with O_APPEND and O_DIRECT, and we don't hold the i_sem during the write. We must at least hold the i_sem when determining where the end of file is to do the O_APPEND write. In 2.6, I believe that is handled correctly in the VFS layer, so this is not an issue for 2.6, right?
author: Andrew Morton <akpm@osdl.org> 2004-04-11 22:59:45 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-04-11 22:59:45 -0700
commit: cea39746deca7ce8b10f21e4a4b3e96c33381e2e (patch)
tree: 673297c70a7c5bc0d22d035d85bb40df16b24621
parent: 3f66b056e1b56427eec0b26e0a20ac08fb8a6dc9 (diff)
3 files changed, 105 insertions, 64 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3486b799e9e4..abad0aa00d13 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -282,80 +282,88 @@ void f_delown(struct file *filp)
 
 EXPORT_SYMBOL(f_delown);
 
-static long do_fcntl(unsigned int fd, unsigned int cmd,
-		     unsigned long arg, struct file * filp)
+long generic_file_fcntl(int fd, unsigned int cmd,
+			unsigned long arg, struct file *filp)
 {
 	long err = -EINVAL;
 
 	switch (cmd) {
-		case F_DUPFD:
-			get_file(filp);
-			err = dupfd(filp, arg);
-			break;
-		case F_GETFD:
-			err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
-			break;
-		case F_SETFD:
-			err = 0;
-			set_close_on_exec(fd, arg & FD_CLOEXEC);
-			break;
-		case F_GETFL:
-			err = filp->f_flags;
-			break;
-		case F_SETFL:
-			err = setfl(fd, filp, arg);
-			break;
-		case F_GETLK:
-			err = fcntl_getlk(filp, (struct flock __user *) arg);
-			break;
-		case F_SETLK:
-		case F_SETLKW:
-			err = fcntl_setlk(filp, cmd, (struct flock __user *) arg);
-			break;
-		case F_GETOWN:
-			/*
-			 * XXX If f_owner is a process group, the
-			 * negative return value will get converted
-			 * into an error.  Oops.  If we keep the
-			 * current syscall conventions, the only way
-			 * to fix this will be in libc.
-			 */
-			err = filp->f_owner.pid;
-			force_successful_syscall_return();
-			break;
-		case F_SETOWN:
-			err = f_setown(filp, arg, 1);
-			break;
-		case F_GETSIG:
-			err = filp->f_owner.signum;
-			break;
-		case F_SETSIG:
-			/* arg == 0 restores default behaviour. */
-			if (arg < 0 || arg > _NSIG) {
-				break;
-			}
-			err = 0;
-			filp->f_owner.signum = arg;
-			break;
-		case F_GETLEASE:
-			err = fcntl_getlease(filp);
-			break;
-		case F_SETLEASE:
-			err = fcntl_setlease(fd, filp, arg);
-			break;
-		case F_NOTIFY:
-			err = fcntl_dirnotify(fd, filp, arg);
-			break;
-		default:
+	case F_DUPFD:
+		get_file(filp);
+		err = dupfd(filp, arg);
+		break;
+	case F_GETFD:
+		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
+		break;
+	case F_SETFD:
+		err = 0;
+		set_close_on_exec(fd, arg & FD_CLOEXEC);
+		break;
+	case F_GETFL:
+		err = filp->f_flags;
+		break;
+	case F_SETFL:
+		err = setfl(fd, filp, arg);
+		break;
+	case F_GETLK:
+		err = fcntl_getlk(filp, (struct flock __user *) arg);
+		break;
+	case F_SETLK:
+	case F_SETLKW:
+		err = fcntl_setlk(filp, cmd, (struct flock __user *) arg);
+		break;
+	case F_GETOWN:
+		/*
+		 * XXX If f_owner is a process group, the
+		 * negative return value will get converted
+		 * into an error.  Oops.  If we keep the
+		 * current syscall conventions, the only way
+		 * to fix this will be in libc.
+		 */
+		err = filp->f_owner.pid;
+		force_successful_syscall_return();
+		break;
+	case F_SETOWN:
+		err = f_setown(filp, arg, 1);
+		break;
+	case F_GETSIG:
+		err = filp->f_owner.signum;
+		break;
+	case F_SETSIG:
+		/* arg == 0 restores default behaviour. */
+		if (arg < 0 || arg > _NSIG) {
 			break;
+		}
+		err = 0;
+		filp->f_owner.signum = arg;
+		break;
+	case F_GETLEASE:
+		err = fcntl_getlease(filp);
+		break;
+	case F_SETLEASE:
+		err = fcntl_setlease(fd, filp, arg);
+		break;
+	case F_NOTIFY:
+		err = fcntl_dirnotify(fd, filp, arg);
+		break;
+	default:
+		break;
 	}
-
 	return err;
 }
+EXPORT_SYMBOL(generic_file_fcntl);
 
-asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
+static long do_fcntl(int fd, unsigned int cmd,
+			unsigned long arg, struct file *filp)
+{
+	if (filp->f_op && filp->f_op->fcntl)
+		return filp->f_op->fcntl(fd, cmd, arg, filp);
+	return generic_file_fcntl(fd, cmd, arg, filp);
+}
+
+asmlinkage long sys_fcntl(int fd, unsigned int cmd, unsigned long arg)
 {	
-	struct file * filp;
+	struct file *filp;
 	long err = -EBADF;
 
 	filp = fget(fd);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e1203ef2275e..df23d4de5b89 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,6 +33,8 @@
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
+static long nfs_file_fcntl(int fd, unsigned int cmd,
+			unsigned long arg, struct file *filp);
 static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
@@ -55,6 +57,7 @@ struct file_operations nfs_file_operations = {
 	.fsync		= nfs_fsync,
 	.lock		= nfs_lock,
 	.sendfile	= nfs_file_sendfile,
+	.fcntl		= nfs_file_fcntl,
 };
 
 struct inode_operations nfs_file_inode_operations = {
@@ -68,6 +71,28 @@ struct inode_operations nfs_file_inode_operations = {
 # define IS_SWAPFILE(inode)	(0)
 #endif
 
+#define nfs_invalid_flags	(O_APPEND | O_DIRECT)
+
+/*
+ * Check for special cases that NFS doesn't support, and
+ * pass the rest to the generic fcntl function.
+ */
+static long
+nfs_file_fcntl(int fd, unsigned int cmd,
+		unsigned long arg, struct file *filp)
+{
+	switch (cmd) {
+	case F_SETFL:
+		if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags)
+			return -EINVAL;
+		break;
+	default:
+		break;
+	}
+
+	return generic_file_fcntl(fd, cmd, arg, filp);
+}
+
 /*
  * Open file
  */
@@ -78,6 +103,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
 	int (*open)(struct inode *, struct file *);
 	int res = 0;
 
+	if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags)
+		return -EINVAL;
+
 	lock_kernel();
 	/* Do NFSv4 open() call */
 	if ((open = server->rpc_ops->file_open) != NULL)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e71560374c0..02976f7c9f47 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -621,6 +621,9 @@ extern struct list_head file_lock_list;
 
 #include <linux/fcntl.h>
 
+extern long generic_file_fcntl(int fd, unsigned int cmd,
+				unsigned long arg, struct file *filp);
+
 extern int fcntl_getlk(struct file *, struct flock __user *);
 extern int fcntl_setlk(struct file *, unsigned int, struct flock __user *);
 
@@ -830,6 +833,8 @@ struct file_operations {
 	ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void __user *);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+	long (*fcntl)(int fd, unsigned int cmd,
+			unsigned long arg, struct file *filp);
 };
 
 struct inode_operations {
author	Andrew Morton <akpm@osdl.org>	2004-04-11 22:59:45 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-04-11 22:59:45 -0700
commit	cea39746deca7ce8b10f21e4a4b3e96c33381e2e (patch)
tree	673297c70a7c5bc0d22d035d85bb40df16b24621
parent	3f66b056e1b56427eec0b26e0a20ac08fb8a6dc9 (diff)