From abfd0ec555be01df13a9e38ba543c2ed377df7cf Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:23:39 -0700
Subject: [PATCH] dm: Fix 64/32 bit ioctl problems.

From: Kevin Corry <kevcorry@us.ibm.com>

Fix 64/32 bit ioctl problems.
---
 include/linux/compat_ioctl.h | 13 +++++++++++++
 include/linux/dm-ioctl.h     | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index be4c3e031f1e..278c533b6994 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -123,6 +123,19 @@ COMPATIBLE_IOCTL(STOP_ARRAY)
 COMPATIBLE_IOCTL(STOP_ARRAY_RO)
 COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
 /* DM */
+COMPATIBLE_IOCTL(DM_VERSION_32)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES_32)
+COMPATIBLE_IOCTL(DM_DEV_CREATE_32)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE_32)
+COMPATIBLE_IOCTL(DM_DEV_RENAME_32)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32)
+COMPATIBLE_IOCTL(DM_DEV_STATUS_32)
+COMPATIBLE_IOCTL(DM_DEV_WAIT_32)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD_32)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS_32)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS_32)
+COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32)
 COMPATIBLE_IOCTL(DM_VERSION)
 COMPATIBLE_IOCTL(DM_LIST_DEVICES)
 COMPATIBLE_IOCTL(DM_DEV_CREATE)
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index cd781d795f8d..036f98bb840a 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -200,6 +200,34 @@ enum {
 	DM_LIST_VERSIONS_CMD,
 };
 
+/*
+ * The dm_ioctl struct passed into the ioctl is just the header
+ * on a larger chunk of memory.  On x86-64 and other
+ * architectures the dm-ioctl struct will be padded to an 8 byte
+ * boundary so the size will be different, which would change the
+ * ioctl code - yes I really messed up.  This hack forces these
+ * architectures to have the correct ioctl code.
+ */
+#ifdef CONFIG_COMPAT
+typedef char ioctl_struct[308];
+#define DM_VERSION_32       _IOWR(DM_IOCTL, DM_VERSION_CMD, ioctl_struct)
+#define DM_REMOVE_ALL_32    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, ioctl_struct)
+#define DM_LIST_DEVICES_32  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, ioctl_struct)
+
+#define DM_DEV_CREATE_32    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, ioctl_struct)
+#define DM_DEV_REMOVE_32    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, ioctl_struct)
+#define DM_DEV_RENAME_32    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, ioctl_struct)
+#define DM_DEV_SUSPEND_32   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, ioctl_struct)
+#define DM_DEV_STATUS_32    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, ioctl_struct)
+#define DM_DEV_WAIT_32      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, ioctl_struct)
+
+#define DM_TABLE_LOAD_32    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, ioctl_struct)
+#define DM_TABLE_CLEAR_32   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, ioctl_struct)
+#define DM_TABLE_DEPS_32    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, ioctl_struct)
+#define DM_TABLE_STATUS_32  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct)
+#define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct)
+#endif
+
 #define DM_IOCTL 0xfd
 
 #define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
-- 
cgit v1.2.3


From e79ed99a7914f7c0792243dc7cd1a8af9baed58e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:24:54 -0700
Subject: [PATCH] dm: fix a comment

From: Kevin Corry <kevcorry@us.ibm.com>

Clarify the comment regarding the "next" field in struct dm_target_spec.  The
"next" field has different behavior if you're performing a DM_TABLE_STATUS
command than it does if you're performing a DM_TABLE_LOAD command.

See populate_table() and retrieve_status() in drivers/md/dm-ioctl.c for more
details on how this field is used.
---
 include/linux/dm-ioctl.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 036f98bb840a..03f99db7ad7b 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -129,8 +129,14 @@ struct dm_target_spec {
 	int32_t status;		/* used when reading from kernel only */
 
 	/*
-	 * Offset in bytes (from the start of this struct) to
-	 * next target_spec.
+	 * Location of the next dm_target_spec.
+	 * - When specifying targets on a DM_TABLE_LOAD command, this value is
+	 *   the number of bytes from the start of the "current" dm_target_spec
+	 *   to the start of the "next" dm_target_spec.
+	 * - When retrieving targets on a DM_TABLE_STATUS command, this value
+	 *   is the number of bytes from the start of the first dm_target_spec
+	 *   (that follows the dm_ioctl struct) to the start of the "next"
+	 *   dm_target_spec.
 	 */
 	uint32_t next;
 
-- 
cgit v1.2.3


From 599692fc635da0e613702fbde2c1ccbd427c6d2f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:25:57 -0700
Subject: [PATCH] kNFSdv4: Keep state to allow replays for 'close' to work.

From: NeilBrown <neilb@cse.unsw.edu.au>

From: "J. Bruce Fields" <bfields@fieldses.org>

From: Andros: Idea is to keep around a list of openowners recently released
by closes, and make sure they stay around long enough so that replays still
work.
---
 fs/nfsd/nfs4state.c        | 149 ++++++++++++++++++++++++---------------------
 include/linux/nfsd/state.h |   6 ++
 2 files changed, 84 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 13ca74de2a21..7f1abbc5aa90 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -136,12 +136,16 @@ static void release_file(struct nfs4_file *fp);
  *
  * client_lru holds client queue ordered by nfs4_client.cl_time
  * for lease renewal.
+ *
+ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+ * for last close replay.
  */
 static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head client_lru;
+static struct list_head close_lru;
 
 static inline void
 renew_client(struct nfs4_client *clp)
@@ -376,7 +380,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	unsigned int 		strhashval;
 	struct nfs4_client *	conf, * unconf, * new, * clp;
 	int 			status;
-	struct list_head *pos, *next;
 	
 	status = nfserr_inval;
 	if (!check_name(clname))
@@ -391,8 +394,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 
 	conf = NULL;
 	nfs4_lock_state();
-	list_for_each_safe(pos, next, &conf_str_hashtbl[strhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_strhash);
+	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
 		if (!cmp_name(&clp->cl_name, &clname))
 			continue;
 		/* 
@@ -422,8 +424,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		break;
 	}
 	unconf = NULL;
-	list_for_each_safe(pos, next, &unconf_str_hashtbl[strhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_strhash);
+	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
 		if (!cmp_name(&clp->cl_name, &clname))
 			continue;
 		/* cl_name match from a previous SETCLIENTID operation */
@@ -549,7 +550,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
-	struct list_head *pos, *next;
 	int status;
 
 	status = nfserr_stale_clientid;
@@ -562,8 +562,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 
 	idhashval = clientid_hashval(clid->cl_id);
 	nfs4_lock_state();
-	list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_idhash);
+	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
 		if (!cmp_clid(&clp->cl_clientid, clid))
 			continue;
 
@@ -582,8 +581,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		conf = clp;
 		break;
 	}
-	list_for_each_safe(pos, next, &unconf_id_hashtbl[idhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_idhash);
+	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
 		if (!cmp_clid(&clp->cl_clientid, clid))
 			continue;
 		status = nfserr_inval;
@@ -774,6 +772,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_perclient);
 	INIT_LIST_HEAD(&sop->so_perfilestate);
 	INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+	INIT_LIST_HEAD(&sop->so_close_lru);
+	sop->so_time = 0;
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
 	list_add(&sop->so_perclient, &clp->cl_perclient);
@@ -814,6 +814,7 @@ release_stateowner(struct nfs4_stateowner *sop)
 	list_del(&sop->so_strhash);
 	list_del(&sop->so_perclient);
 	list_del(&sop->so_perlockowner);
+	list_del(&sop->so_close_lru);
 	del_perclient++;
 	while (!list_empty(&sop->so_perfilestate)) {
 		stp = list_entry(sop->so_perfilestate.next, 
@@ -881,6 +882,19 @@ release_file(struct nfs4_file *fp)
 	kfree(fp);
 }	
 
+void
+move_to_close_lru(struct nfs4_stateowner *sop)
+{
+	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
+	/* remove stateowner from all other hash lists except perclient */
+	list_del_init(&sop->so_idhash);
+	list_del_init(&sop->so_strhash);
+	list_del_init(&sop->so_perlockowner);
+
+        list_add_tail(&sop->so_close_lru, &close_lru);
+        sop->so_time = get_seconds();
+}
+
 void
 release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp,
 		int flag)
@@ -890,16 +904,13 @@ release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp,
 
 	dprintk("NFSD: release_state_owner\n");
 	release_stateid(stp, flag);
-	/*
-	 * release unused nfs4_stateowners.
-	 * XXX will need to be placed  on an  open_stateid_lru list to be
+
+	/* place unused nfs4_stateowners on so_close_lru list to be
 	 * released by the laundromat service after the lease period
 	 * to enable us to handle CLOSE replay
 	 */
-	if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) {
-		release_stateowner(sop);
-		*sopp = NULL;
-	}
+	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+		move_to_close_lru(sop);
 	/* unused nfs4_file's are releseed. XXX slab cache? */
 	if (list_empty(&fp->fi_perfile)) {
 		release_file(fp);
@@ -916,11 +927,9 @@ cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t
 /* search ownerstr_hashtbl[] for owner */
 static int
 find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_stateowner **op) {
-	struct list_head *pos, *next;
 	struct nfs4_stateowner *local = NULL;
 
-	list_for_each_safe(pos, next, &ownerstr_hashtbl[hashval]) {
-		local = list_entry(pos, struct nfs4_stateowner, so_strhash);
+	list_for_each_entry(local, &ownerstr_hashtbl[hashval], so_strhash) {
 		if(!cmp_owner_str(local, &open->op_owner, &open->op_clientid)) 
 			continue;
 		*op = local;
@@ -933,12 +942,10 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nf
 static int
 verify_clientid(struct nfs4_client **client, clientid_t *clid) {
 
-	struct list_head *pos, *next;
 	struct nfs4_client *clp;
 	unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-	list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_idhash);
+	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
 		if (!cmp_clid(&clp->cl_clientid, clid))
 			continue;
 		*client = clp;
@@ -951,11 +958,9 @@ verify_clientid(struct nfs4_client **client, clientid_t *clid) {
 /* search file_hashtbl[] for file */
 static int
 find_file(unsigned int hashval, struct inode *ino, struct nfs4_file **fp) {
-	struct list_head *pos, *next;
 	struct nfs4_file *local = NULL;
 
-	list_for_each_safe(pos, next, &file_hashtbl[hashval]) {
-		local = list_entry(pos, struct nfs4_file, fi_hash);
+	list_for_each_entry(local, &file_hashtbl[hashval], fi_hash) {
 		if (local->fi_inode == ino) {
 			*fp = local;
 			return(1);
@@ -1011,15 +1016,13 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	unsigned int fi_hashval;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
-	struct list_head *pos, *next;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
 	fi_hashval = file_hashval(ino);
 	if (find_file(fi_hashval, ino, &fp)) {
 	/* Search for conflicting share reservations */
-		list_for_each_safe(pos, next, &fp->fi_perfile) {
-			stp = list_entry(pos, struct nfs4_stateid, st_perfile);
+		list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
 			if (test_bit(deny_type, &stp->st_deny_bmap) ||
 			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
 				return nfserr_share_denied;
@@ -1154,7 +1157,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	struct nfs4_file *fp;
 	struct inode *ino;
 	unsigned int fi_hashval;
-	struct list_head *pos, *next;
 	struct nfs4_stateid *stq, *stp = NULL;
 	int status;
 
@@ -1173,8 +1175,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (find_file(fi_hashval, ino, &fp)) {
 		/* Search for conflicting share reservations */
 		status = nfserr_share_denied;
-		list_for_each_safe(pos, next, &fp->fi_perfile) {
-		stq = list_entry(pos, struct nfs4_stateid, st_perfile);
+		list_for_each_entry(stq, &fp->fi_perfile, st_perfile) {
 			if(stq->st_stateowner == sop) {
 				stp = stq;
 				continue;
@@ -1274,7 +1275,6 @@ int
 nfsd4_renew(clientid_t *clid)
 {
 	struct nfs4_client *clp;
-	struct list_head *pos, *next;
 	unsigned int idhashval;
 	int status;
 
@@ -1286,15 +1286,13 @@ nfsd4_renew(clientid_t *clid)
 		goto out;
 	status = nfs_ok;
 	idhashval = clientid_hashval(clid->cl_id);
-	list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_idhash);
+	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
 		if (!cmp_clid(&clp->cl_clientid, clid))
 			continue;
 		renew_client(clp);
 		goto out;
 	}
-	list_for_each_safe(pos, next, &unconf_id_hashtbl[idhashval]) {
-		clp = list_entry(pos, struct nfs4_client, cl_idhash);
+	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
 		if (!cmp_clid(&clp->cl_clientid, clid))
 			continue;
 		renew_client(clp);
@@ -1316,9 +1314,11 @@ time_t
 nfs4_laundromat(void)
 {
 	struct nfs4_client *clp;
+	struct nfs4_stateowner *sop;
 	struct list_head *pos, *next;
 	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
-	time_t t, return_val = NFSD_LEASE_TIME;
+	time_t t, clientid_val = NFSD_LEASE_TIME;
+	time_t u, close_val = NFSD_LEASE_TIME;
 
 	nfs4_lock_state();
 
@@ -1327,18 +1327,30 @@ nfs4_laundromat(void)
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
 			t = clp->cl_time - cutoff;
-			if (return_val > t)
-				return_val = t;
+			if (clientid_val > t)
+				clientid_val = t;
 			break;
 		}
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
 		expire_client(clp);
 	}
-	if (return_val < NFSD_LAUNDROMAT_MINTIMEOUT)
-		return_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+	list_for_each_safe(pos, next, &close_lru) {
+		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+			u = sop->so_time - cutoff;
+			if (close_val > u)
+				close_val = u;
+			break;
+		}
+		dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
+			sop->so_id);
+		release_stateowner(sop);
+	}
+	if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
+		clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
 	nfs4_unlock_state();
-	return return_val;
+	return clientid_val;
 }
 
 void
@@ -1351,17 +1363,19 @@ laundromat_main(void *not_used)
 	schedule_delayed_work(&laundromat_work, t*HZ);
 }
 
-/* search ownerid_hashtbl[] for stateid owner (stateid->si_stateownerid) */
+/* search ownerid_hashtbl[] and close_lru for stateid owner
+ * (stateid->si_stateownerid)
+ */
 struct nfs4_stateowner *
-find_openstateowner_id(u32 st_id) {
-	struct list_head *pos, *next;
+find_openstateowner_id(u32 st_id, int flags) {
 	struct nfs4_stateowner *local = NULL;
-	unsigned int hashval = ownerid_hashval(st_id);
 
-	list_for_each_safe(pos, next, &ownerid_hashtbl[hashval]) {
-		local = list_entry(pos, struct nfs4_stateowner, so_idhash);
-		if(local->so_id == st_id)
-			return local;
+	dprintk("NFSD: find_openstateowner_id %d\n", st_id);
+	if (flags & CLOSE_STATE) {
+		list_for_each_entry(local, &close_lru, so_close_lru) {
+			if(local->so_id == st_id)
+				return local;
+		}
 	}
 	return NULL;
 }
@@ -1547,11 +1561,12 @@ no_nfs4_stateid:
 	* starting by trying to look up the stateowner.
 	* If stateowner is not found - stateid is bad.
 	*/
-	if (!(sop = find_openstateowner_id(stateid->si_stateownerid))) {
+	if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) {
 		printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n");
 		status = nfserr_bad_stateid;
 		goto out;
 	}
+	*sopp = sop;
 
 check_replay:
 	if (seqid == sop->so_seqid) {
@@ -1690,9 +1705,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
 			current_fh->fh_dentry->d_name.name);
 
 	nfs4_lock_state();
+	/* check close_lru for replay */
 	if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, 
 					&close->cl_stateid, 
-					CHECK_FH | OPEN_STATE, 
+					CHECK_FH | OPEN_STATE | CLOSE_STATE,
 					&close->cl_stateowner, &stp, NULL)))
 		goto out; 
 	/*
@@ -1729,7 +1745,6 @@ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
 struct nfs4_stateid *
 find_stateid(stateid_t *stid, int flags)
 {
-	struct list_head *pos, *next;
 	struct nfs4_stateid *local = NULL;
 	u32 st_id = stid->si_stateownerid;
 	u32 f_id = stid->si_fileid;
@@ -1738,8 +1753,7 @@ find_stateid(stateid_t *stid, int flags)
 	dprintk("NFSD: find_stateid flags 0x%x\n",flags);
 	if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) {
 		hashval = stateid_hashval(st_id, f_id);
-		list_for_each_safe(pos, next, &lockstateid_hashtbl[hashval]) {
-			local = list_entry(pos, struct nfs4_stateid, st_hash);
+		list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
 			if((local->st_stateid.si_stateownerid == st_id) &&
 			   (local->st_stateid.si_fileid == f_id))
 				return local;
@@ -1747,8 +1761,7 @@ find_stateid(stateid_t *stid, int flags)
 	} 
 	if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) {
 		hashval = stateid_hashval(st_id, f_id);
-		list_for_each_safe(pos, next, &stateid_hashtbl[hashval]) {
-			local = list_entry(pos, struct nfs4_stateid, st_hash);
+		list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
 			if((local->st_stateid.si_stateownerid == st_id) &&
 			   (local->st_stateid.si_fileid == f_id))
 				return local;
@@ -1779,14 +1792,12 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 int
 nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
 {
-	struct list_head *pos, *next;
 	struct nfs4_stateowner *local = NULL;
 	int status = 0;
 			        
 	if (hashval >= LOCK_HASH_SIZE)
 		goto out;
-	list_for_each_safe(pos, next, &lock_ownerid_hashtbl[hashval]) {
-		local = list_entry(pos, struct nfs4_stateowner, so_idhash);
+	list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
 		if (local == sop) {
 			status = 1;
 			goto out;
@@ -1817,11 +1828,9 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 
 static int
 find_lockstateowner_str(unsigned int hashval, struct xdr_netobj *owner, clientid_t *clid, struct nfs4_stateowner **op) {
-	struct list_head *pos, *next;
 	struct nfs4_stateowner *local = NULL;
 
-	list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[hashval]) {
-		local = list_entry(pos, struct nfs4_stateowner, so_strhash);
+	list_for_each_entry(local, &lock_ownerstr_hashtbl[hashval], so_strhash) {
 		if(!cmp_owner_str(local, owner, clid)) 
 			continue;
 		*op = local;
@@ -1854,6 +1863,8 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_perclient);
 	INIT_LIST_HEAD(&sop->so_perfilestate);
 	INIT_LIST_HEAD(&sop->so_perlockowner);
+	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
+	sop->so_time = 0;
 	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
 	list_add(&sop->so_perclient, &clp->cl_perclient);
@@ -2265,7 +2276,6 @@ int
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
-	struct list_head *pos, *next;
 	struct nfs4_stateowner *local = NULL;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
 	int status, i;
@@ -2286,9 +2296,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 	/* find the lockowner */
         status = nfs_ok;
 	for (i=0; i < LOCK_HASH_SIZE; i++) {
-		list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[i]) {
-			local = list_entry(pos, struct nfs4_stateowner,
-						so_strhash);
+		list_for_each_entry(local, &lock_ownerstr_hashtbl[i], so_strhash) {
 			if(cmp_owner_str(local, owner, clid))
 				break;
 		}
@@ -2299,9 +2307,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 		/* check for any locks held by any stateid associated with the
 		 * (lock) stateowner */
 		status = nfserr_locks_held;
-		list_for_each_safe(pos, next, &local->so_perfilestate) {
-			stp = list_entry(pos, struct nfs4_stateid,
-					                    st_perfilestate);
+		list_for_each_entry(stp, &local->so_perfilestate, st_perfilestate) {
 			if(stp->st_vfs_set) {
 				if (check_for_locks(&stp->st_vfs_file, local))
 					goto out;
@@ -2351,6 +2357,7 @@ nfs4_state_init(void)
 	memset(&zerostateid, 0, sizeof(stateid_t));
 	memset(&onestateid, ~0, sizeof(stateid_t));
 
+	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	init_MUTEX(&client_sema);
 	boot_time = get_seconds();
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 4598b9756668..d68fad3edf1d 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -132,6 +132,9 @@ struct nfs4_replay {
 *         release a stateowner.
 *    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
 *         close is called to reap associated byte-range locks
+*    so_close_lru: (open) stateowner is placed on this list instead of being
+*         reaped (when so_perfilestate is empty) to hold the last close replay.
+*         reaped by laundramat thread after lease period.
 */
 struct nfs4_stateowner {
 	struct list_head        so_idhash;   /* hash by so_id */
@@ -139,6 +142,8 @@ struct nfs4_stateowner {
 	struct list_head        so_perclient; /* nfs4_client->cl_perclient */
 	struct list_head        so_perfilestate; /* list: nfs4_stateid */
 	struct list_head        so_perlockowner; /* nfs4_stateid->st_perlockowner */
+	struct list_head	so_close_lru; /* tail queue */
+	time_t			so_time; /* time of placement on so_close_lru */
 	int			so_is_open_owner; /* 1=openowner,0=lockowner */
 	u32                     so_id;
 	struct nfs4_client *    so_client;
@@ -194,6 +199,7 @@ struct nfs4_stateid {
 #define OPEN_STATE              0x00000004
 #define LOCK_STATE              0x00000008
 #define RDWR_STATE              0x00000010
+#define CLOSE_STATE             0x00000020
 
 #define seqid_mutating_err(err)                       \
 	(((err) != nfserr_stale_clientid) &&    \
-- 
cgit v1.2.3


From cba16655c63f8ed2f6d308d423ce0ce47b91c892 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:26:55 -0700
Subject: [PATCH] kNFSdv4: Implement server-side reboot recovery (mostly)

From: NeilBrown <neilb@cse.unsw.edu.au>

From: "J. Bruce Fields" <bfields@fieldses.org>

From: Andros: Implement server-side reboot recovery (server now handles
open and lock reclaims).  Not completely to spec: we don't yet store the
state in stable storage that would be required to recover correctly in
certain situations.
---
 fs/nfsd/nfs4proc.c         | 102 ++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/nfs4state.c        |  53 ++++++++++++++++++++---
 include/linux/nfsd/nfsd.h  |   3 ++
 include/linux/nfsd/state.h |   2 +
 4 files changed, 140 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7fad51aa8f19..d8765a09327f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -65,11 +65,32 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 	*dst = *src;
 }
 
+static int
+do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	int accmode, status;
+
+	if (open->op_truncate &&
+		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+		return nfserr_inval;
+
+	accmode = MAY_NOP;
+	if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
+		accmode = MAY_READ;
+	if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
+		accmode |= (MAY_WRITE | MAY_TRUNC);
+	accmode |= MAY_OWNER_OVERRIDE;
+
+	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+
+	return status;
+}
+
 static int
 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct svc_fh resfh;
-	int accmode, status;
+	int status;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -92,6 +113,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 
 	if (!status) {
 		set_change_info(&open->op_cinfo, current_fh);
+
+		/* set reply cache */
 		fh_dup2(current_fh, &resfh);
 		/* XXXJBF: keep a saved svc_fh struct instead?? */
 		open->op_stateowner->so_replay.rp_openfh_len =
@@ -100,19 +123,41 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 				&resfh.fh_handle.fh_base,
 				resfh.fh_handle.fh_size);
 
-		accmode = MAY_NOP;
-		if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-			accmode = MAY_READ;
-		if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
-			accmode |= (MAY_WRITE | MAY_TRUNC);
-		accmode |= MAY_OWNER_OVERRIDE;
-		status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+		status = do_open_permission(rqstp, current_fh, open);
 	}
 
 	fh_put(&resfh);
 	return status;
 }
 
+static int
+do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	int status;
+
+	dprintk("NFSD: do_open_fhandle\n");
+
+	/* we don't know the target directory, and therefore can not
+	* set the change info
+	*/
+
+	memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
+
+	/* set replay cache */
+	open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size;
+	memcpy(open->op_stateowner->so_replay.rp_openfh,
+		&current_fh->fh_handle.fh_base,
+		current_fh->fh_handle.fh_size);
+
+	open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
+	!open->op_iattr.ia_size;
+
+	status = do_open_permission(rqstp, current_fh, open);
+
+	return status;
+}
+
+
 /*
  * nfs4_unlock_state() called in encode
  */
@@ -124,6 +169,13 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 		(int)open->op_fname.len, open->op_fname.data,
 		open->op_stateowner);
 
+	if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+		return nfserr_grace;
+
+	if (nfs4_in_no_grace() &&
+		           open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+		return nfserr_no_grace;
+
 	/* This check required by spec. */
 	if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
 		return nfserr_inval;
@@ -148,16 +200,30 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	}
 	if (status)
 		return status;
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_NULL) {
 	/*
 	 * This block of code will (1) set CURRENT_FH to the file being opened,
 	 * creating it if necessary, (2) set open->op_cinfo, 
 	 * (3) set open->op_truncate if the file is to be truncated 
 	 * after opening, (4) do permission checking.
 	 */
-	status = do_open_lookup(rqstp, current_fh, open);
-	if (status)
-		return status;
-
+		status = do_open_lookup(rqstp, current_fh, open);
+		if (status)
+			return status;
+	} else if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
+	/*
+	* The CURRENT_FH is already set to the file being opened. This
+	* block of code will (1) set open->op_cinfo, (2) set
+	* open->op_truncate if the file is to be truncated after opening,
+	* (3) do permission checking.
+	*/
+		status = do_open_fhandle(rqstp, current_fh, open);
+		if (status)
+			return status;
+	} else {
+		printk("NFSD: unsupported OPEN claim type\n");
+		return nfserr_inval;
+	}
 	/*
 	 * nfsd4_process_open2() does the actual opening of the file.  If
 	 * successful, it (1) truncates the file if open->op_truncate was
@@ -414,6 +480,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read
 	int status;
 
 	/* no need to check permission - this will be done in nfsd_read() */
+	if (nfs4_in_grace())
+		return nfserr_grace;
 
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
@@ -537,10 +605,13 @@ static inline int
 nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
 {
 	struct nfs4_stateid *stp;
-	int status = nfserr_nofilehandle;
+	int status = nfs_ok;
+
+	if (nfs4_in_grace())
+		return nfserr_grace;
 
 	if (!current_fh->fh_dentry)
-		goto out;
+		return nfserr_nofilehandle;
 
 	status = nfs_ok;
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
@@ -579,6 +650,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 	u32 *p;
 	int status = nfs_ok;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
+
 	/* no need to check permission - this will be done in nfsd_write() */
 
 	if (write->wr_offset >= OFFSET_MAX)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5ded0c0f70a3..73a8944ad96e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -52,6 +52,7 @@
 
 /* Globals */
 time_t boot_time;
+static time_t grace_end = 0;
 static u32 current_clientid = 1;
 static u32 current_ownerid;
 static u32 current_fileid;
@@ -1090,7 +1091,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
 
 	status = nfserr_stale_clientid;
 	if (STALE_CLIENTID(&open->op_clientid))
-		goto out;
+		return status;
 
 	strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
 	if (find_openstateowner_str(strhashval, open, &sop)) {
@@ -1111,7 +1112,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
 			}
 			/* replay: indicate to calling function */
 			status = NFSERR_REPLAY_ME;
-			goto out;
+			return status;
 		}
 		if (sop->so_confirmed) {
 			if (open->op_seqid == sop->so_seqid + 1) { 
@@ -1149,6 +1150,8 @@ instantiate_new_owner:
 renew:
 	renew_client(sop->so_client);
 out:
+	if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+		status = nfserr_reclaim_bad;
 	return status;
 }
 /*
@@ -1159,7 +1162,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 {
 	struct iattr iattr;
 	struct nfs4_stateowner *sop = open->op_stateowner;
-	struct nfs4_file *fp;
+	struct nfs4_file *fp = NULL;
 	struct inode *ino;
 	unsigned int fi_hashval;
 	struct nfs4_stateid *stq, *stp = NULL;
@@ -1167,7 +1170,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	status = nfserr_resource;
 	if (!sop)
-		goto out;
+		return status;
 
 	ino = current_fh->fh_dentry->d_inode;
 
@@ -1258,6 +1261,17 @@ out:
 	if (fp && list_empty(&fp->fi_perfile))
 		release_file(fp);
 
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
+		if (status)
+			status = nfserr_reclaim_bad;
+		else {
+		/* successful reclaim. so_seqid is decremented because
+		* it will be bumped in encode_open
+		*/
+			open->op_stateowner->so_confirmed = 1;
+			open->op_stateowner->so_seqid--;
+		}
+	}
 	/*
 	* To finish the open response, we just need to set the rflags.
 	*/
@@ -1270,6 +1284,7 @@ out_free:
 	kfree(stp);
 	goto out;
 }
+
 static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1954,6 +1969,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		(long long) lock->lk_offset,
 		(long long) lock->lk_length);
 
+	if (nfs4_in_grace() && !lock->lk_reclaim)
+		return nfserr_grace;
+	if (nfs4_in_no_grace() && lock->lk_reclaim)
+		return nfserr_no_grace;
+
 	if (check_lock_length(lock->lk_offset, lock->lk_length))
 		 return nfserr_inval;
 
@@ -1983,8 +2003,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		                        CHECK_FH | OPEN_STATE,
 		                        &open_sop, &open_stp,
 					&lock->v.new.clientid);
-		if (status)
+		if (status) {
+			if (lock->lk_reclaim)
+				status = nfserr_reclaim_bad;
 			goto out;
+		}
 		/* create lockowner and lock stateid */
 		fp = open_stp->st_file;
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
@@ -2119,6 +2142,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	unsigned int strhashval;
 	int status;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
+
 	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
 		 return nfserr_inval;
 
@@ -2343,6 +2369,7 @@ void
 nfs4_state_init(void)
 {
 	int i;
+	time_t start = get_seconds();
 
 	if (nfs4_init)
 		return;
@@ -2373,13 +2400,27 @@ nfs4_state_init(void)
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	init_MUTEX(&client_sema);
-	boot_time = get_seconds();
+	boot_time = start;
+	grace_end = start + NFSD_LEASE_TIME;
 	INIT_WORK(&laundromat_work,laundromat_main, NULL);
 	schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
 	nfs4_init = 1;
 
 }
 
+int
+nfs4_in_grace(void)
+{
+	return time_before(get_seconds(), (unsigned long)grace_end);
+}
+
+int
+nfs4_in_no_grace(void)
+{
+	return (grace_end < get_seconds());
+}
+
+
 static void
 __nfs4_state_shutdown(void)
 {
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 418356558209..cd9c59cf93fd 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -196,6 +196,9 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_openmode		__constant_htonl(NFSERR_OPENMODE)
 #define	nfserr_locks_held	__constant_htonl(NFSERR_LOCKS_HELD)
 #define	nfserr_op_illegal	__constant_htonl(NFSERR_OP_ILLEGAL)
+#define	nfserr_grace		__constant_htonl(NFSERR_GRACE)
+#define	nfserr_no_grace		__constant_htonl(NFSERR_NO_GRACE)
+#define	nfserr_reclaim_bad	__constant_htonl(NFSERR_RECLAIM_BAD)
 
 /* error codes for internal use */
 /* if a request fails due to kmalloc failure, it gets dropped.
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index d68fad3edf1d..06da18506122 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -215,4 +215,6 @@ extern int nfs4_share_conflict(struct svc_fh *current_fh,
 		unsigned int deny_type);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
+extern int nfs4_in_grace(void);
+extern int nfs4_in_no_grace(void);
 #endif   /* NFSD4_STATE_H */
-- 
cgit v1.2.3


From 01d86f023be81c6b96cca5a3db2383469b0e7a41 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:27:06 -0700
Subject: [PATCH] kill submit_{bh,bio} return value

From: Jeff Garzik <jgarzik@pobox.com>

Nobody ever checks the return value of submit_bh(), and submit_bh() is the
only caller that checks the submit_bio() return value.

This changes the kernel I/O submission path -- a fast path -- so this
cleanup is also a microoptimization.
---
 drivers/block/ll_rw_blk.c   | 3 +--
 fs/buffer.c                 | 4 ++--
 include/linux/buffer_head.h | 2 +-
 include/linux/fs.h          | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 5ee752d64f4a..85dac0809fb2 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -2429,7 +2429,7 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces, @bio must be presetup and ready for I/O.
  *
  */
-int submit_bio(int rw, struct bio *bio)
+void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 
@@ -2451,7 +2451,6 @@ int submit_bio(int rw, struct bio *bio)
 	}
 
 	generic_make_request(bio);
-	return 1;
 }
 
 EXPORT_SYMBOL(submit_bio);
diff --git a/fs/buffer.c b/fs/buffer.c
index 99f1ce112ea9..b06f2059a485 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2684,7 +2684,7 @@ static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
 	return 0;
 }
 
-int submit_bh(int rw, struct buffer_head * bh)
+void submit_bh(int rw, struct buffer_head * bh)
 {
 	struct bio *bio;
 
@@ -2722,7 +2722,7 @@ int submit_bh(int rw, struct buffer_head * bh)
 	bio->bi_end_io = end_bio_bh_io_sync;
 	bio->bi_private = bh;
 
-	return submit_bio(rw, bio);
+	submit_bio(rw, bio);
 }
 
 /**
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 110584f07883..afe2d3ad1e4e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -177,7 +177,7 @@ void free_buffer_head(struct buffer_head * bh);
 void FASTCALL(unlock_buffer(struct buffer_head *bh));
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 void sync_dirty_buffer(struct buffer_head *bh);
-int submit_bh(int, struct buffer_head *);
+void submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c9fb9bbe154a..1467a19015e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1359,7 +1359,7 @@ extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 struct bio;
-extern int submit_bio(int, struct bio *);
+extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
-- 
cgit v1.2.3


From 4f990f49474ef93e2b47f451cf289c9e30234e0f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:27:28 -0700
Subject: [PATCH] remove buffer_error()

From: Jeff Garzik <jgarzik@pobox.com>

It was debug code, no longer required.
---
 fs/buffer.c                 | 100 ++++----------------------------------------
 fs/ext3/inode.c             |   2 -
 fs/mpage.c                  |   3 +-
 fs/ntfs/aops.c              |   2 -
 fs/reiserfs/inode.c         |   5 ---
 include/linux/buffer_head.h |   7 ----
 6 files changed, 8 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index b06f2059a485..21b8ae31e827 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -51,25 +51,6 @@ static struct bh_wait_queue_head {
 	wait_queue_head_t wqh;
 } ____cacheline_aligned_in_smp bh_wait_queue_heads[1<<BH_WAIT_TABLE_ORDER];
 
-/*
- * Debug/devel support stuff
- */
-
-void __buffer_error(char *file, int line)
-{
-	static int enough;
-
-	if (enough > 10)
-		return;
-	enough++;
-	printk("buffer layer error at %s:%d\n", file, line);
-#ifndef CONFIG_KALLSYMS
-	printk("Pass this trace through ksymoops for reporting\n");
-#endif
-	dump_stack();
-}
-EXPORT_SYMBOL(__buffer_error);
-
 inline void
 init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 {
@@ -99,17 +80,6 @@ EXPORT_SYMBOL(wake_up_buffer);
 
 void fastcall unlock_buffer(struct buffer_head *bh)
 {
-	/*
-	 * unlock_buffer against a zero-count bh is a bug, if the page
-	 * is not locked.  Because then nothing protects the buffer's
-	 * waitqueue, which is used here. (Well.  Other locked buffers
-	 * against the page will pin it.  But complain anyway).
-	 */
-	if (atomic_read(&bh->b_count) == 0 &&
-			!PageLocked(bh->b_page) &&
-			!PageWriteback(bh->b_page))
-		buffer_error();
-
 	clear_buffer_locked(bh);
 	smp_mb__after_clear_bit();
 	wake_up_buffer(bh);
@@ -125,10 +95,6 @@ void __wait_on_buffer(struct buffer_head * bh)
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
 	DEFINE_WAIT(wait);
 
-	if (atomic_read(&bh->b_count) == 0 &&
-			(!bh->b_page || !PageLocked(bh->b_page)))
-		buffer_error();
-
 	do {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 		if (buffer_locked(bh)) {
@@ -146,8 +112,6 @@ void __wait_on_buffer(struct buffer_head * bh)
 static void
 __set_page_buffers(struct page *page, struct buffer_head *head)
 {
-	if (page_has_buffers(page))
-		buffer_error();
 	page_cache_get(page);
 	SetPagePrivate(page);
 	page->private = (unsigned long)head;
@@ -433,10 +397,12 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
 		}
 		bh = bh->b_this_page;
 	} while (bh != head);
-	buffer_error();
-	printk("block=%llu, b_blocknr=%llu\n",
+
+	printk("__find_get_block_slow() failed. "
+		"block=%llu, b_blocknr=%llu\n",
 		(unsigned long long)block, (unsigned long long)bh->b_blocknr);
 	printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
+	printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
 out_unlock:
 	spin_unlock(&bd_mapping->private_lock);
 	page_cache_release(page);
@@ -847,10 +813,7 @@ int __set_page_dirty_buffers(struct page *page)
 		struct buffer_head *bh = head;
 
 		do {
-			if (buffer_uptodate(bh))
-				set_buffer_dirty(bh);
-			else
-				buffer_error();
+			set_buffer_dirty(bh);
 			bh = bh->b_this_page;
 		} while (bh != head);
 	}
@@ -1151,7 +1114,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	return page;
 
 failed:
-	buffer_error();
+	BUG();
 	unlock_page(page);
 	page_cache_release(page);
 	return NULL;
@@ -1247,8 +1210,6 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
  */
 void fastcall mark_buffer_dirty(struct buffer_head *bh)
 {
-	if (!buffer_uptodate(bh))
-		buffer_error();
 	if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
 		__set_page_dirty_nobuffers(bh->b_page);
 }
@@ -1267,7 +1228,7 @@ void __brelse(struct buffer_head * buf)
 		return;
 	}
 	printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
-	buffer_error();		/* For the stack backtrace */
+	WARN_ON(1);
 }
 
 /*
@@ -1294,8 +1255,6 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh)
 		unlock_buffer(bh);
 		return bh;
 	} else {
-		if (buffer_dirty(bh))
-			buffer_error();
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
 		submit_bh(READ, bh);
@@ -1686,10 +1645,6 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
 
 	old_bh = __find_get_block_slow(bdev, block, 0);
 	if (old_bh) {
-#if 0	/* This happens.  Later. */
-		if (buffer_dirty(old_bh))
-			buffer_error();
-#endif
 		clear_buffer_dirty(old_bh);
 		wait_on_buffer(old_bh);
 		clear_buffer_req(old_bh);
@@ -1737,8 +1692,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
 	if (!page_has_buffers(page)) {
-		if (!PageUptodate(page))
-			buffer_error();
 		create_empty_buffers(page, 1 << inode->i_blkbits,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
 	}
@@ -1767,9 +1720,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			 * mapped buffers outside i_size will occur, because
 			 * this page can be outside i_size when there is a
 			 * truncate in progress.
-			 *
-			 * if (buffer_mapped(bh))
-			 *	buffer_error();
 			 */
 			/*
 			 * The buffer was zeroed by block_write_full_page()
@@ -1777,8 +1727,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			clear_buffer_dirty(bh);
 			set_buffer_uptodate(bh);
 		} else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
-			if (buffer_new(bh))
-				buffer_error();
 			err = get_block(inode, block, bh, 1);
 			if (err)
 				goto recover;
@@ -1811,8 +1759,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			continue;
 		}
 		if (test_clear_buffer_dirty(bh)) {
-			if (!buffer_uptodate(bh))
-				buffer_error();
 			mark_buffer_async_write(bh);
 		} else {
 			unlock_buffer(bh);
@@ -1942,8 +1888,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
-					if (!buffer_mapped(bh))
-						buffer_error();
 					set_buffer_uptodate(bh);
 					continue;
 				}
@@ -2001,8 +1945,6 @@ out:
 			void *kaddr;
 
 			clear_buffer_new(bh);
-			if (buffer_uptodate(bh))
-				buffer_error();
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr+block_start, 0, bh->b_size);
 			kunmap_atomic(kaddr, KM_USER0);
@@ -2068,8 +2010,6 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 
 	if (!PageLocked(page))
 		PAGE_BUG(page);
-	if (PageUptodate(page))
-		buffer_error();
 	blocksize = 1 << inode->i_blkbits;
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
@@ -2692,13 +2632,6 @@ void submit_bh(int rw, struct buffer_head * bh)
 	BUG_ON(!buffer_mapped(bh));
 	BUG_ON(!bh->b_end_io);
 
-	if ((rw == READ || rw == READA) && buffer_uptodate(bh))
-		buffer_error();
-	if (rw == WRITE && !buffer_uptodate(bh))
-		buffer_error();
-	if (rw == READ && buffer_dirty(bh))
-		buffer_error();
-
 	/* Only clear out a write error when rewriting */
 	if (test_set_buffer_req(bh) && rw == WRITE)
 		clear_buffer_write_io_error(bh);
@@ -2797,21 +2730,6 @@ void sync_dirty_buffer(struct buffer_head *bh)
 	}
 }
 
-/*
- * Sanity checks for try_to_free_buffers.
- */
-static void check_ttfb_buffer(struct page *page, struct buffer_head *bh)
-{
-	if (!buffer_uptodate(bh) && !buffer_req(bh)) {
-		if (PageUptodate(page) && page->mapping
-			&& buffer_mapped(bh)	/* discard_buffer */
-			&& S_ISBLK(page->mapping->host->i_mode))
-		{
-			buffer_error();
-		}
-	}
-}
-
 /*
  * try_to_free_buffers() checks if all the buffers on this particular page
  * are unused, and releases them if so.
@@ -2847,7 +2765,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 
 	bh = head;
 	do {
-		check_ttfb_buffer(page, bh);
 		if (buffer_write_io_error(bh))
 			set_bit(AS_EIO, &page->mapping->flags);
 		if (buffer_busy(bh))
@@ -2857,9 +2774,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 		bh = bh->b_this_page;
 	} while (bh != head);
 
-	if (!was_uptodate && PageUptodate(page) && !PageError(page))
-		buffer_error();
-
 	do {
 		struct buffer_head *next = bh->b_this_page;
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f5333b88376b..929b643115b8 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1358,8 +1358,6 @@ static int ext3_ordered_writepage(struct page *page,
 	}
 
 	if (!page_has_buffers(page)) {
-		if (!PageUptodate(page))
-			buffer_error();
 		create_empty_buffers(page, inode->i_sb->s_blocksize,
 				(1 << BH_Dirty)|(1 << BH_Uptodate));
 	}
diff --git a/fs/mpage.c b/fs/mpage.c
index 1f7d045fa905..a22fc8f0ba9b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -485,8 +485,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 			break;
 		block_in_file++;
 	}
-	if (page_block == 0)
-		buffer_error();
+	BUG_ON(page_block == 0);
 
 	first_unmapped = page_block;
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index bb048a75318d..3f4df6ddbe1c 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1340,8 +1340,6 @@ err_out:
 			void *kaddr;
 
 			clear_buffer_new(bh);
-			if (buffer_uptodate(bh))
-				buffer_error();
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + block_start, 0, bh->b_size);
 			kunmap_atomic(kaddr, KM_USER0);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6df304c6b1cc..308ecf13cc0c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1925,7 +1925,6 @@ static int map_block_for_writepage(struct inode *inode,
     th.t_trans_id = 0;
 
     if (!buffer_uptodate(bh_result)) {
-        buffer_error();
 	return -EIO;
     }
 
@@ -2057,8 +2056,6 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control
      * in the BH_Uptodate is just a sanity check.
      */
     if (!page_has_buffers(page)) {
-	if (!PageUptodate(page))
-	    buffer_error();
 	create_empty_buffers(page, inode->i_sb->s_blocksize, 
 	                    (1 << BH_Dirty) | (1 << BH_Uptodate));
     }
@@ -2120,8 +2117,6 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control
 	    }
 	}
 	if (test_clear_buffer_dirty(bh)) {
-	    if (!buffer_uptodate(bh))
-		buffer_error();
 	    mark_buffer_async_write(bh);
 	} else {
 	    unlock_buffer(bh);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index afe2d3ad1e4e..ebe0b1221579 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -61,13 +61,6 @@ struct buffer_head {
 	struct list_head b_assoc_buffers; /* associated with another mapping */
 };
 
-/*
- * Debug
- */
-
-void __buffer_error(char *file, int line);
-#define buffer_error() __buffer_error(__FILE__, __LINE__)
-
 /*
  * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  * and buffer_foo() functions.
-- 
cgit v1.2.3


From c4373a9a8415758b91a004293643d60e698d347c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 17 Apr 2004 03:28:02 -0700
Subject: [PATCH] Add "commit=0" to reiserfs

From: Bart Samwel <bart@samwel.tk>

Add support for value 0 to the commit option of reiserfs.  Means "restore
to the default value".  For the maximum commit age, this default value is
normally read from the journal; this patch adds an extra variable to cache
the default value for the maximum commit age.
---
 fs/reiserfs/journal.c          |  1 +
 fs/reiserfs/super.c            | 16 ++++++++++------
 include/linux/reiserfs_fs_sb.h |  2 ++
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 96398695fa10..c1c64b5689eb 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2308,6 +2308,7 @@ int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_fo
       SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) = commit_max_age;
   } else {
       SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age);
+      SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(p_s_sb) = SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb);
       SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)  = JOURNAL_MAX_TRANS_AGE;
   }
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index fbd4d224b69b..f2233df104a6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -709,13 +709,11 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 		char *p = 0;
 		int val = simple_strtoul (arg, &p, 0);
 		/* commit=NNN (time in seconds) */
-		if ( *p != '\0' || val == 0) {
+		if ( *p != '\0' || val < 0) {
 			printk ("reiserfs_parse_options: bad value %s\n", arg);
 			return 0;
 		}
-		if ( val > 0 ) {
-			*commit_max_age = val;
-		}
+		*commit_max_age = val;
 	}
 
 	if ( c == 'w' ) {
@@ -821,8 +819,14 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
   REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)->s_mount_opt & ~safe_mask) |  (mount_options & safe_mask);
 
   if(commit_max_age != 0) {
-	  SB_JOURNAL_MAX_COMMIT_AGE(s) = commit_max_age;
-	  SB_JOURNAL_MAX_TRANS_AGE(s) = commit_max_age;
+    SB_JOURNAL_MAX_COMMIT_AGE(s) = commit_max_age;
+    SB_JOURNAL_MAX_TRANS_AGE(s) = commit_max_age;
+  }
+  else
+  {
+    /* 0 means restore defaults. */
+    SB_JOURNAL_MAX_COMMIT_AGE(s) = SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(s);
+    SB_JOURNAL_MAX_TRANS_AGE(s) = JOURNAL_MAX_TRANS_AGE;
   }
 
   if(blocks) {
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 3248dcf369f2..9fa2813c2e69 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -208,6 +208,7 @@ struct reiserfs_journal {
   unsigned int s_journal_trans_max ;           /* max number of blocks in a transaction.  */
   unsigned int s_journal_max_batch ;           /* max number of blocks to batch into a trans */
   unsigned int s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
+  unsigned int s_journal_default_max_commit_age ; /* the default for the max commit age */
   unsigned int s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */  
 
   struct reiserfs_journal_cnode *j_cnode_free_list ;
@@ -481,6 +482,7 @@ int reiserfs_resize(struct super_block *, unsigned long) ;
 #define SB_JOURNAL_TRANS_MAX(s)      (SB_JOURNAL(s)->s_journal_trans_max)
 #define SB_JOURNAL_MAX_BATCH(s)      (SB_JOURNAL(s)->s_journal_max_batch)
 #define SB_JOURNAL_MAX_COMMIT_AGE(s) (SB_JOURNAL(s)->s_journal_max_commit_age)
+#define SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(s) (SB_JOURNAL(s)->s_journal_default_max_commit_age)
 #define SB_JOURNAL_MAX_TRANS_AGE(s)  (SB_JOURNAL(s)->s_journal_max_trans_age)
 
 /* A safe version of the "bdevname", which returns the "s_id" field of
-- 
cgit v1.2.3


From 3ba9ac07443fbcf68424a12daeadae36d2bc8b23 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Sat, 17 Apr 2004 03:45:53 -0700
Subject: [PATCH] Remove unused 'kobject' from superblock

The field in question is
  a) unused
  b) damn next to impossible to use correctly, due to struct super_block
     lifetime and locking rules.
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1467a19015e0..edaaaf6ce341 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -751,7 +751,6 @@ struct super_block {
 
 	char s_id[32];				/* Informational name */
 
-	struct kobject           kobj;          /* anchor for sysfs */
 	void 			*s_fs_info;	/* Filesystem private info */
 
 	/*
-- 
cgit v1.2.3


From e4cf8264fa0bbc0b113442df03ceb9606d2ec428 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 17 Apr 2004 03:50:14 -0700
Subject: [PATCH] Fix vma corruption

It occurred to me that if vma and new_vma are one and the same, then
vma_relink_file will not do a good job of linking it after itself - in
that pretty unlikely case when move_page_tables fails.

And more generally, whenever copy_vma's vma_merge succeeds, we have no
guarantee that old vma comes before new_vma in the i_mmap lists, as we
need to satisfy Rajesh's point: that ordering is only guaranteed in the
newly allocated case.

We have to abandon the ordering method when/if we move from lists to
prio_trees, so this patch switches to the less glamorous use of
i_shared_sem exclusion, as in my prio_tree mremap.
---
 include/linux/mm.h |  3 +--
 mm/mmap.c          | 34 +++++++++++-----------------------
 mm/mremap.c        | 22 ++++++++++++++--------
 3 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 14dba1b26016..4fd3c76ac05f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -526,9 +526,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
-extern struct vm_area_struct *copy_vma(struct vm_area_struct *,
+extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, unsigned long pgoff);
-extern void vma_relink_file(struct vm_area_struct *, struct vm_area_struct *);
 extern void exit_mmap(struct mm_struct *);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/mm/mmap.c b/mm/mmap.c
index eed4e083bca1..94ad97fb2b04 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1498,9 +1498,11 @@ void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  * Copy the vma structure to a new location in the same mm,
  * prior to moving page table entries, to effect an mremap move.
  */
-struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
+struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	unsigned long addr, unsigned long len, unsigned long pgoff)
 {
+	struct vm_area_struct *vma = *vmap;
+	unsigned long vma_start = vma->vm_start;
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
@@ -1508,7 +1510,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
 	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len,
 			vma->vm_flags, vma->vm_file, pgoff);
-	if (!new_vma) {
+	if (new_vma) {
+		/*
+		 * Source vma may have been merged into new_vma
+		 */
+		if (vma_start >= new_vma->vm_start &&
+		    vma_start < new_vma->vm_end)
+			*vmap = new_vma;
+	} else {
 		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
@@ -1525,24 +1534,3 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
 	}
 	return new_vma;
 }
-
-/*
- * Position vma after prev in shared file list:
- * for mremap move error recovery racing against vmtruncate.
- */
-void vma_relink_file(struct vm_area_struct *vma, struct vm_area_struct *prev)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	struct address_space *mapping;
-
-	if (vma->vm_file) {
-		mapping = vma->vm_file->f_mapping;
-		if (mapping) {
-			down(&mapping->i_shared_sem);
-			spin_lock(&mm->page_table_lock);
-			list_move(&vma->shared, &prev->shared);
-			spin_unlock(&mm->page_table_lock);
-			up(&mapping->i_shared_sem);
-		}
-	}
-}
diff --git a/mm/mremap.c b/mm/mremap.c
index c355d4da4afe..4dc19b415000 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -169,6 +169,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long new_len, unsigned long new_addr)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct address_space *mapping = NULL;
 	struct vm_area_struct *new_vma;
 	unsigned long vm_flags = vma->vm_flags;
 	unsigned long new_pgoff;
@@ -184,30 +185,35 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
-	new_vma = copy_vma(vma, new_addr, new_len, new_pgoff);
+	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
 		return -ENOMEM;
 
+	if (vma->vm_file) {
+		/*
+		 * Subtle point from Rajesh Venkatasubramanian: before
+		 * moving file-based ptes, we must lock vmtruncate out,
+		 * since it might clean the dst vma before the src vma,
+		 * and we propagate stale pages into the dst afterward.
+		 */
+		mapping = vma->vm_file->f_mapping;
+		down(&mapping->i_shared_sem);
+	}
 	moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
 	if (moved_len < old_len) {
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
 		 * and then proceed to unmap new area instead of old.
-		 *
-		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, move new_vma before old vma
-		 * in the i_mmap or i_mmap_shared list, so when racing
-		 * against vmtruncate we cannot propagate pages to be
-		 * truncated back from new_vma into just cleaned old.
 		 */
-		vma_relink_file(vma, new_vma);
 		move_page_tables(new_vma, old_addr, new_addr, moved_len);
 		vma = new_vma;
 		old_len = new_len;
 		old_addr = new_addr;
 		new_addr = -ENOMEM;
 	}
+	if (mapping)
+		up(&mapping->i_shared_sem);
 
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT) {
-- 
cgit v1.2.3