From 89c07d608d6f5c4a698b25410b5dff3c751fbb16 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@kleikamp.austin.ibm.com>
Date: Wed, 9 Oct 2002 05:37:14 -0500
Subject: JFS: change name of get_index() to read_index()

get_index conflicts with a symbol in a mips include file.
---
 fs/jfs/jfs_dtree.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 72b3148cb038..df2fa4d8d0fd 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -490,11 +490,11 @@ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 }
 
 /*
- *	get_index()
+ *	read_index()
  *
  *	reads a directory table slot
  */
-static int get_index(struct inode *ip, u32 index,
+static int read_index(struct inode *ip, u32 index,
 		     struct dir_table_slot * dirtab_slot)
 {
 	struct metapage *mp = 0;
@@ -2978,7 +2978,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				return 0;
 			}
 		      repeat:
-			rc = get_index(ip, dir_index, &dirtab_slot);
+			rc = read_index(ip, dir_index, &dirtab_slot);
 			if (rc) {
 				filp->f_pos = DIREND;
 				return rc;
-- 
cgit v1.2.3


From 1c3d71d920c95ce32c688ad6c5bf9ad399b9b313 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 01:44:18 +0200
Subject: XFS: More mount cleanups

Modid: 2.5.x-xfs:slinx:128571a
---
 fs/xfs/linux/xfs_super.c          |  49 ------------
 fs/xfs/linux/xfs_super.h          |  10 ---
 fs/xfs/pagebuf/page_buf.h         |  13 ----
 fs/xfs/pagebuf/page_buf_locking.c |  81 -------------------
 fs/xfs/xfs_mount.c                |  83 +++++++++++++++++++-
 fs/xfs/xfs_mount.h                |   5 ++
 fs/xfs/xfs_vfsops.c               | 158 +++++++++++++++-----------------------
 7 files changed, 149 insertions(+), 250 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 2dfaf44b0f7e..7d99bf82cd8b 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -264,55 +264,6 @@ printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 	return 0;
 }
 
-/*
- * Convert one device special file to a dev_t.
- * Helper routine, used only by spectodevs below.
- */
-STATIC int
-spectodev(
-	const char		*name,
-	const char		*id,
-	dev_t			*dev)
-{
-	struct nameidata	nd;
-	int			error;
-
-	error = path_lookup(name, LOOKUP_FOLLOW, &nd);
-	if (error)
-		return error;
-
-	*dev = kdev_t_to_nr(nd.dentry->d_inode->i_rdev);
-	path_release(&nd);
-	return 0;
-}
-
-/*
- * Convert device special files to dev_t for data, log, realtime.
- */
-int
-spectodevs(
-	struct super_block	*sb,
-	struct xfs_mount_args	*args,
-	dev_t			*ddevp,
-	dev_t			*logdevp,
-	dev_t			*rtdevp)
-{
-	int			rval = 0;
-
-	*ddevp = sb->s_dev;
-
-	if (args->logname[0])
-		rval = spectodev(args->logname, "log", logdevp);
-	else
-		*logdevp = sb->s_dev;
-
-	if (args->rtname[0] && !rval)
-		rval = spectodev(args->rtname, "realtime", rtdevp);
-	else
-		*rtdevp = 0;
-	return rval;
-}
-
 
 STATIC kmem_cache_t * linvfs_inode_cachep;
 
diff --git a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
index e783163a2300..315910498a0c 100644
--- a/fs/xfs/linux/xfs_super.h
+++ b/fs/xfs/linux/xfs_super.h
@@ -80,18 +80,8 @@
 	((s)->s_fs_info = vfsp)
 
 
-struct xfs_mount_args;
-
 extern void
 linvfs_set_inode_ops(
 	struct inode	*inode);
 
-extern int
-spectodevs(
-	struct super_block *sb,
-	struct xfs_mount_args *args,
-	dev_t		*ddevp,
-	dev_t		*logdevp,
-	dev_t		*rtdevp);
-
 #endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
index 18e27035cf72..6513fea00ded 100644
--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -145,7 +145,6 @@ typedef struct pb_target {
 	struct block_device	*pbr_bdev;
 	struct address_space	*pbr_mapping;
 	unsigned int		pbr_blocksize;
-	unsigned int		pbr_blocksize_bits;
 } pb_target_t;
 
 /*
@@ -303,18 +302,6 @@ extern int pagebuf_lock_value(		/* return count on lock		*/
 extern int pagebuf_lock(		/* lock buffer			*/
 		page_buf_t *);		/* buffer to lock		*/
 
-extern void pagebuf_lock_disable(	/* disable buffer locking	*/
-		struct pb_target *,	/* inode for buffers		*/
-		int);			/* do blkdev_put?		*/
-
-extern struct pb_target *pagebuf_lock_enable(
-		dev_t,
-		int);			/* do blkdev_get?		*/
-
-extern void pagebuf_target_blocksize(
-		pb_target_t *,
-		unsigned int);		/* block size			*/
-
 extern void pagebuf_target_clear(struct pb_target *);
 
 extern void pagebuf_unlock(		/* unlock buffer		*/
diff --git a/fs/xfs/pagebuf/page_buf_locking.c b/fs/xfs/pagebuf/page_buf_locking.c
index 6be04596ec11..ecabe0f3c2c2 100644
--- a/fs/xfs/pagebuf/page_buf_locking.c
+++ b/fs/xfs/pagebuf/page_buf_locking.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -58,10 +57,6 @@
 
 #include "page_buf_internal.h"
 
-#ifndef EVMS_MAJOR
-#define EVMS_MAJOR      117
-#endif
-
 /*
  *	pagebuf_cond_lock
  *
@@ -126,82 +121,6 @@ pagebuf_lock(
 	return 0;
 }
 
-/*
- *	pagebuf_lock_disable
- *
- *	pagebuf_lock_disable disables buffer object locking for an inode.
- *	remove_super() does a blkdev_put for us on the data device, hence
- * 	the do_blkdev_put argument.
- */
-void
-pagebuf_lock_disable(
-	pb_target_t		*target,
-	int			do_blkdev_put)
-{
-	pagebuf_delwri_flush(target, PBDF_WAIT, NULL);
-	if (do_blkdev_put)
-		blkdev_put(target->pbr_bdev, BDEV_FS);
-	kfree(target);
-}
-
-/*
- *	pagebuf_lock_enable
- *
- *	get_sb_bdev() does a blkdev_get for us on the data device, hence
- *	the do_blkdev_get argument.
- */
-pb_target_t *
-pagebuf_lock_enable(
-	dev_t			dev,
-	int			do_blkdev_get)
-{
-	struct block_device	*bdev;
-	pb_target_t		*target;
-	int			error = -ENOMEM;
-
-	target = kmalloc(sizeof(pb_target_t), GFP_KERNEL);
-	if (unlikely(!target))
-		return ERR_PTR(error);
-
-	bdev = bdget(dev);
-	if (unlikely(!bdev))
-		goto fail;
-
-	if (do_blkdev_get) {
-		error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS);
-		if (unlikely(error))
-			goto fail;
-	}
-
-	target->pbr_dev = dev;
-	target->pbr_bdev = bdev;
-	target->pbr_mapping = bdev->bd_inode->i_mapping;
-
-	pagebuf_target_blocksize(target, PAGE_CACHE_SIZE);
-	
-	if ((MAJOR(dev) == MD_MAJOR) || (MAJOR(dev) == EVMS_MAJOR))
-		target->pbr_flags = PBR_ALIGNED_ONLY;
-	else if (MAJOR(dev) == LVM_BLK_MAJOR)
-		target->pbr_flags = PBR_SECTOR_ONLY;
-	else
-		target->pbr_flags = 0;
-
-	return target;
-
-fail:
-	kfree(target);
-	return ERR_PTR(error);
-}
-
-void
-pagebuf_target_blocksize(
-	pb_target_t		*target,
-	unsigned int		blocksize)
-{
-	target->pbr_blocksize = blocksize;
-	target->pbr_blocksize_bits = ffs(blocksize) - 1;
-}
-
 void
 pagebuf_target_clear(
 	pb_target_t		*target)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 13c88ba8039e..4dccca3c8b34 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -31,6 +31,13 @@
  */
 
 #include <xfs.h>
+#include <linux/major.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+
+#ifndef EVMS_MAJOR
+#define EVMS_MAJOR	117
+#endif
 
 STATIC void	xfs_mount_reset_sbqflags(xfs_mount_t *);
 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
@@ -1149,15 +1156,17 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
 	int		have_logdev = (mp->m_logdev_targp != mp->m_ddev_targp);
 
 	if (mp->m_ddev_targp) {
-		pagebuf_lock_disable(mp->m_ddev_targp, 0);
+		xfs_free_buftarg(mp->m_ddev_targp);
 		mp->m_ddev_targp = NULL;
 	}
 	if (mp->m_rtdev_targp) {
-		pagebuf_lock_disable(mp->m_rtdev_targp, 1);
+		xfs_blkdev_put(mp->m_rtdev_targp->pbr_bdev);
+		xfs_free_buftarg(mp->m_rtdev_targp);
 		mp->m_rtdev_targp = NULL;
 	}
 	if (mp->m_logdev_targp && have_logdev) {
-		pagebuf_lock_disable(mp->m_logdev_targp, 1);
+		xfs_blkdev_put(mp->m_logdev_targp->pbr_bdev);
+		xfs_free_buftarg(mp->m_logdev_targp);
 		mp->m_logdev_targp = NULL;
 	}
 }
@@ -1725,3 +1734,71 @@ xfs_check_frozen(
 	if (level == XFS_FREEZE_TRANS)
 		atomic_inc(&mp->m_active_trans);
 }
+
+int
+xfs_blkdev_get(
+	const char		*name,
+	struct block_device	**bdevp)
+{
+	struct nameidata	nd;
+	int			error = 0;
+
+	error = path_lookup(name, LOOKUP_FOLLOW, &nd);
+	if (error) {
+		printk("XFS: Invalid device [%s], error=%d\n",
+				name, error);
+		return error;
+	}
+
+	/* I think we actually want bd_acquire here..  --hch */
+	*bdevp = bdget(kdev_t_to_nr(nd.dentry->d_inode->i_rdev));
+	if (*bdevp) {
+		error = blkdev_get(*bdevp, FMODE_READ|FMODE_WRITE, 0, BDEV_FS);
+	} else {
+		error = -ENOMEM;
+	}
+
+	path_release(&nd);
+	return -error;
+}
+
+void
+xfs_blkdev_put(
+	struct block_device	*bdev)
+{
+	blkdev_put(bdev, BDEV_FS);
+}
+
+void
+xfs_free_buftarg(
+	xfs_buftarg_t		*btp)
+{
+	pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
+	kfree(btp);
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+	struct block_device	*bdev)
+{
+	xfs_buftarg_t		*btp;
+
+	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+	btp->pbr_dev =  bdev->bd_dev;
+	btp->pbr_bdev = bdev;
+	btp->pbr_mapping = bdev->bd_inode->i_mapping;
+	btp->pbr_blocksize = PAGE_CACHE_SIZE;
+
+	switch (MAJOR(btp->pbr_dev)) {
+	case MD_MAJOR:
+	case EVMS_MAJOR:
+		btp->pbr_flags = PBR_ALIGNED_ONLY;
+		break;
+	case LVM_BLK_MAJOR:
+		btp->pbr_flags = PBR_SECTOR_ONLY;
+		break;
+	}
+
+	return btp;
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 51c86fea20c4..a5b4ec193617 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -436,6 +436,11 @@ int		xfs_syncsub(xfs_mount_t *, int, int, int *);
 void		xfs_initialize_perag(xfs_mount_t *, int);
 void		xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t);
 
+int		xfs_blkdev_get(const char *, struct block_device **);
+void		xfs_blkdev_put(struct block_device *);
+struct xfs_buftarg *xfs_alloc_buftarg(struct block_device *);
+void		xfs_free_buftarg(struct xfs_buftarg *);
+
 /*
  * Flags for freeze operations.
  */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index cce4a74d5dfe..297cfe03e845 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -392,144 +392,114 @@ xfs_finish_flags(
 }
 
 /*
- * xfs_cmountfs
+ * xfs_mount
+ *
+ * The file system configurations are:
+ *	(1) device (partition) with data and internal log
+ *	(2) logical volume with data and log subvolumes.
+ *	(3) logical volume with data, log, and realtime subvolumes.
  *
- * This function is the common mount file system function for XFS.
+ * The Linux VFS took care of finding and opening the data volume for
+ * us.  We have to handle the other two (if present) here.
  */
 STATIC int
-xfs_cmountfs(
+xfs_mount(
 	vfs_t			*vfsp,
-	dev_t			ddev,
-	dev_t			logdev,
-	dev_t			rtdev,
-	struct xfs_mount_args	*ap,
-	struct cred		*cr)
+	struct xfs_mount_args	*args,
+	cred_t			*credp)
 {
 	xfs_mount_t		*mp;
+	struct block_device	*ddev, *logdev, *rtdev;
 	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
 	int			error = 0;
 
-	/*
-	 * Allocate VFS private data (xfs mount structure).
-	 */
-	mp = xfs_mount_init();
-
-	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+	ddev = vfsp->vfs_super->s_bdev;
+	logdev = rtdev = NULL;
 
 	/*
-	 * Open data, real time, and log devices now - order is important.
+	 * Open real time and log devices - order is important.
 	 */
-	mp->m_ddev_targp = pagebuf_lock_enable(ddev, 0);
-	if (IS_ERR(mp->m_ddev_targp)) {
-		error = PTR_ERR(mp->m_ddev_targp);
-		goto error2;
+	if (args->logname[0]) {
+		error = xfs_blkdev_get(args->logname, &logdev);
+		if (error)
+			return error;
 	}
-
-	if (rtdev != 0) {
-		mp->m_rtdev_targp = pagebuf_lock_enable(rtdev, 1);
-		if (IS_ERR(mp->m_rtdev_targp)) {
-			error = PTR_ERR(mp->m_rtdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 0);
-			goto error2;
+	if (args->rtname[0]) {
+		error = xfs_blkdev_get(args->rtname, &rtdev);
+		if (error) {
+			xfs_blkdev_put(logdev);
+			return error;
 		}
 
 		if (rtdev == ddev || rtdev == logdev) {
 			cmn_err(CE_WARN,
 	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-			error = EINVAL;
-			pagebuf_lock_disable(mp->m_ddev_targp, 0);
-			goto error2;
+			xfs_blkdev_put(logdev);
+			xfs_blkdev_put(rtdev);
+			return EINVAL;
 		}
-		
-		/* Set the realtime device's block size */
-		set_blocksize(mp->m_rtdev_targp->pbr_bdev, 512);
 	}
 
-	if (logdev != ddev) {
-		mp->m_logdev_targp = pagebuf_lock_enable(logdev, 1);
-		if (IS_ERR(mp->m_logdev_targp)) {
-			error = PTR_ERR(mp->m_logdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 1);
-			if (mp->m_rtdev_targp)
-				pagebuf_lock_disable(mp->m_rtdev_targp, 1);
-			goto error2;
-		}
+	/*
+	 * Allocate VFS private data (xfs mount structure).
+	 */
+	mp = xfs_mount_init();
 
-		/* Set the log device's block size */
-		set_blocksize(mp->m_logdev_targp->pbr_bdev, 512);
+	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+
+	mp->m_ddev_targp = xfs_alloc_buftarg(ddev);
+	if (rtdev != NULL) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev);
+		set_blocksize(rtdev, 512);
+	}
+	if (logdev != NULL && logdev != ddev) {
+		mp->m_logdev_targp = xfs_alloc_buftarg(logdev);
+		set_blocksize(logdev, 512);
 	} else {
 		mp->m_logdev_targp = mp->m_ddev_targp;
 	}
 	
-	if ((error = xfs_start_flags(ap, mp, ronly)))
-		goto error3;
+	error = xfs_start_flags(args, mp, ronly);
+	if (error)
+		goto error;
 
-	if ((error = xfs_readsb(mp)))
-		goto error3;
+	error = xfs_readsb(mp);
+	if (error)
+		goto error;
 
-	if ((error = xfs_finish_flags(ap, mp, ronly))) {
+	error = xfs_finish_flags(args, mp, ronly);
+	if (error) {
 		xfs_freesb(mp);
-		goto error3;
+		goto error;
 	}
 
-	pagebuf_target_blocksize(mp->m_ddev_targp, mp->m_sb.sb_blocksize);
-	if (logdev != 0 && logdev != ddev)
-		pagebuf_target_blocksize(mp->m_logdev_targp,
-					mp->m_sb.sb_blocksize);
-	if (rtdev != 0)
-		pagebuf_target_blocksize(mp->m_rtdev_targp,
-					mp->m_sb.sb_blocksize);
+	mp->m_ddev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	if (logdev != 0 && logdev != ddev) {
+		mp->m_logdev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	}
+	if (rtdev != 0) {
+		mp->m_rtdev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	}
 
 	mp->m_cxfstype = XFS_CXFS_NOT;
-	error = xfs_mountfs(vfsp, mp, ddev, 0);
+	error = xfs_mountfs(vfsp, mp, ddev->bd_dev, 0);
 	if (error)
-		goto error3;
+		goto error;
 	return 0;
 
- error3:
-	/* It's impossible to get here before buftargs are filled */
+ error:
 	xfs_binval(mp->m_ddev_targp);
-	pagebuf_lock_disable(mp->m_ddev_targp, 0);
-	if (logdev && logdev != ddev) {
+	if (logdev != NULL && logdev != ddev) {
 		xfs_binval(mp->m_logdev_targp);
-		pagebuf_lock_disable(mp->m_logdev_targp, 1);
 	}
-	if (rtdev != 0) {
+	if (rtdev != NULL) {
 		xfs_binval(mp->m_rtdev_targp);
-		pagebuf_lock_disable(mp->m_rtdev_targp, 1);
-	}
- error2:
-	if (error) {
-		xfs_mount_free(mp, 1);
 	}
+	xfs_unmountfs_close(mp, NULL);
+	xfs_mount_free(mp, 1);
 	return error;
 }
 
-/*
- * xfs_mount
- *
- * The file system configurations are:
- *	(1) device (partition) with data and internal log
- *	(2) logical volume with data and log subvolumes.
- *	(3) logical volume with data, log, and realtime subvolumes.
- */
-STATIC int
-xfs_mount(
-	vfs_t			*vfsp,
-	struct xfs_mount_args	*args,
-	cred_t			*credp)
-{
-	dev_t		ddev;
-	dev_t		logdev;
-	dev_t		rtdev;
-	int		error;
-
-	error = spectodevs(vfsp->vfs_super, args, &ddev, &logdev, &rtdev);
-	if (!error)
-		error = xfs_cmountfs(vfsp, ddev, logdev, rtdev, args, credp);
-	return (error);
-}
-
 /*
  * xfs_ibusy searches for a busy inode in the mounted file system.
  *
-- 
cgit v1.2.3


From f815162c2704dd1fa11f35e71f7adf43f0588b21 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 01:53:08 +0200
Subject: XFS: I/O path cleanups

Modid: 2.5.x-xfs:slinx:128581a
---
 fs/xfs/linux/xfs_aops.c | 228 +++++++++++++++++-------------------------------
 1 file changed, 78 insertions(+), 150 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index e749c3c3bbed..2fd598c79e7f 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -36,7 +36,6 @@
 #include <linux/mpage.h>
 
 
-STATIC int delalloc_convert(struct inode *, struct page *, int, int);
 
 STATIC int
 map_blocks(
@@ -129,83 +128,6 @@ map_buffer_at_offset(
 	clear_buffer_delay(bh);
 }
 
-/*
- * Convert delalloc space to real space, do not flush the
- * data out to disk, that will be done by the caller.
- */
-STATIC int
-release_page(
-	struct page		*page)
-{
-	struct inode		*inode = (struct inode*)page->mapping->host;
-	unsigned long		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	int			ret;
-
-	/* Are we off the end of the file ? */
-	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-		if ((page->index >= end_index+1) || !offset) {
-			ret =  -EIO;
-			goto out;
-		}
-	}
-
-	ret = delalloc_convert(inode, page, 0, 0);
-
-out:
-	if (ret < 0) {
-		block_invalidatepage(page, 0);
-		ClearPageUptodate(page);
-
-		return 0;
-	}
-
-	return 1;
-}
-
-/*
- * Convert delalloc or unmapped space to real space and flush out
- * to disk.
- */
-STATIC int
-write_full_page(
-	struct page		*page,
-	int			delalloc)
-{
-	struct inode		*inode = (struct inode*)page->mapping->host;
-	unsigned long		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	int			ret;
-
-	/* Are we off the end of the file ? */
-	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-		if ((page->index >= end_index+1) || !offset) {
-			ret =  -EIO;
-			goto out;
-		}
-	}
-
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-	}
-
-	ret = delalloc_convert(inode, page, 1, delalloc == 0);
-
-out:
-	if (ret < 0) {
-		/*
-		 * If it's delalloc and we have nowhere to put it,
-		 * throw it away.
-		 */
-		if (delalloc)
-			block_invalidatepage(page, 0);
-		ClearPageUptodate(page);
-		unlock_page(page);
-	}
-
-	return ret;
-}
-
 /*
  * Look for a page at index which is unlocked and not mapped
  * yet - clustering for mmap write case.
@@ -347,16 +269,21 @@ submit_page(
 		end_page_writeback(page);
 }
 
-STATIC int
-map_page(
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc pages only, for the original page it is possible that
+ * the page has no mapping at all.
+ */
+STATIC void
+convert_page(
 	struct inode		*inode,
 	struct page		*page,
 	page_buf_bmap_t		*maps,
-	struct buffer_head	*bh_arr[],
 	int			startio,
 	int			all_bh)
 {
-	struct buffer_head	*bh, *head;
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	page_buf_bmap_t		*mp = maps, *tmp;
 	unsigned long		end, offset, end_index;
 	int			i = 0, index = 0;
@@ -393,32 +320,12 @@ map_page(
 		}
 	} while (i++, (bh = bh->b_this_page) != head);
 
-	return index;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc pages only, for the original page it is possible that
- * the page has no mapping at all.
- */
-STATIC void
-convert_page(
-	struct inode		*inode,
-	struct page		*page,
-	page_buf_bmap_t		*maps,
-	int			startio,
-	int			all_bh)
-{
-	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE];
-	int			cnt;
-
-	cnt = map_page(inode, page, maps, bh_arr, startio, all_bh);
 	if (startio) {
-		submit_page(page, bh_arr, cnt);
+		submit_page(page, bh_arr, index);
 	} else {
 		unlock_page(page);
 	}
+
 	page_cache_release(page);
 }
 
@@ -439,40 +346,47 @@ cluster_write(
 
 	tlast = (mp->pbm_offset + mp->pbm_bsize) >> PAGE_CACHE_SHIFT;
 	for (; tindex < tlast; tindex++) {
-		if (!(page = probe_page(inode, tindex)))
+		page = probe_page(inode, tindex);
+		if (!page)
 			break;
 		convert_page(inode, page, mp, startio, all_bh);
 	}
 }
 
 /*
- * Calling this without allocate_space set means we are being asked to
- * flush a dirty buffer head. When called with async_write set then we
- * are coming from writepage. A writepage call with allocate_space set
- * means we are being asked to write out all of the page which is before
- * EOF and therefore need to allocate space for unmapped portions of the
- * page.
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers.  When called with startio set then
+ * we are coming from writepage.
  */
 STATIC int
 delalloc_convert(
-	struct inode		*inode,		/* inode containing page */
-	struct page		*page,		/* page to convert - locked */
-	int			startio,	/* start io on the page */
+	struct page		*page,
+	int			startio,
 	int			allocate_space)
 {
-	struct buffer_head	*bh, *head;
-	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE];
+	struct inode		*inode = page->mapping->host;
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	page_buf_bmap_t		*mp, map;
-	int			i, cnt = 0;
-	int			len, err;
-	unsigned long		p_offset = 0;
-	loff_t			offset;
-	loff_t			end_offset;
+	unsigned long		p_offset = 0, end_index;
+	loff_t			offset, end_offset;
+	int			len, err, i, cnt = 0;
+
+	/* Are we off the end of the file ? */
+	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	if (page->index >= end_index) {
+		unsigned remaining = inode->i_size & (PAGE_CACHE_SIZE-1);
+		if ((page->index >= end_index+1) || !remaining) {
+			return -EIO;
+		}
+	}
 
 	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	end_offset = offset + PAGE_CACHE_SIZE;
 	if (end_offset > inode->i_size)
 		end_offset = inode->i_size;
+	
+	if (startio && !page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 
 	bh = head = page_buffers(page);
 	mp = NULL;
@@ -491,8 +405,9 @@ delalloc_convert(
 			if (!mp) {
 				err = map_blocks(inode, offset, len, &map,
 						PBF_WRITE|PBF_FILE_ALLOCATE);
-				if (err)
+				if (err) {
 					goto error;
+				}
 				mp = match_offset_to_mapping(page, &map,
 								p_offset);
 			}
@@ -517,8 +432,9 @@ delalloc_convert(
 								bh, head);
 				err = map_blocks(inode, offset, size, &map,
 						PBF_WRITE|PBF_DIRECT);
-				if (err)
+				if (err) {
 					goto error;
+				}
 				mp = match_offset_to_mapping(page, &map,
 								p_offset);
 			}
@@ -544,12 +460,14 @@ next_bh:
 		bh = bh->b_this_page;
 	} while (offset < end_offset);
 
-	if (startio)
+	if (startio) {
 		submit_page(page, bh_arr, cnt);
+	}
 
-	if (mp)
+	if (mp) {
 		cluster_write(inode, page->index + 1, mp,
 				startio, allocate_space);
+	}
 
 	return 0;
 
@@ -557,7 +475,15 @@ error:
 	for (i = 0; i < cnt; i++) {
 		unlock_buffer(bh_arr[i]);
 	}
-
+	
+	/*
+	 * If it's delalloc and we have nowhere to put it,
+	 * throw it away.
+	 */
+	if (!allocate_space) {
+		block_invalidatepage(page, 0);
+	}
+	ClearPageUptodate(page);
 	return err;
 }
 
@@ -745,14 +671,12 @@ count_page_state(
 
 		bh = head = page_buffers(page);
 		do {
-			if (buffer_uptodate(bh) && !buffer_mapped(bh)) {
+			if (buffer_uptodate(bh) && !buffer_mapped(bh))
 				(*nr_unmapped)++;
-				continue;
-			}
-			if (!buffer_delay(bh))
-				continue;
-			(*nr_delalloc)++;
+			else if (buffer_delay(bh))
+				(*nr_delalloc)++;
 		} while ((bh = bh->b_this_page) != head);
+
 		return 1;
 	}
 
@@ -764,20 +688,22 @@ linvfs_writepage(
 	struct page		*page)
 {
 	int			error;
-	int			need_trans;
+	int			need_trans = 1;
 	int			nr_delalloc, nr_unmapped;
 
-	if (count_page_state(page, &nr_delalloc, &nr_unmapped)) {
+	if (count_page_state(page, &nr_delalloc, &nr_unmapped))
 		need_trans = nr_delalloc + nr_unmapped;
-	} else {
-		need_trans = 1;
-	}
 
 	if ((current->flags & (PF_FSTRANS)) && need_trans)
 		goto out_fail;
 
-	error = write_full_page(page, nr_delalloc);
-
+	/*
+	 * Convert delalloc or unmapped space to real space and flush out
+	 * to disk.
+	 */
+	error = delalloc_convert(page, 1, nr_delalloc == 0);
+	if (unlikely(error))
+		unlock_page(page);
 	return error;
 
 out_fail:
@@ -812,24 +738,26 @@ linvfs_release_page(
 	struct page		*page,
 	int			gfp_mask)
 {
-	int			need_trans;
 	int			nr_delalloc, nr_unmapped;
 
 	if (count_page_state(page, &nr_delalloc, &nr_unmapped)) {
-		need_trans = nr_delalloc;
-	} else {
-		need_trans = 0;
-	}
-
-	if (need_trans == 0) {
-		return try_to_free_buffers(page);
-	}
+		if (!nr_delalloc)
+			goto free_buffers;
+	} 
 
 	if (gfp_mask & __GFP_FS) {
-		if (release_page(page) == 0)
-			return try_to_free_buffers(page);
+		/*
+		 * Convert delalloc space to real space, do not flush the
+		 * data out to disk, that will be done by the caller.
+		 */
+		if (delalloc_convert(page, 0, 0) == 0)
+			goto free_buffers;
 	}
+
 	return 0;
+
+free_buffers:
+	return try_to_free_buffers(page);
 }
 
 
-- 
cgit v1.2.3


From 988c52ee1e98999f52b23ad87494f1ffb454972a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 01:54:12 +0200
Subject: XFS: Don't reset blocksize on umount

Modid: 2.5.x-xfs:slinx:128659a
---
 fs/xfs/linux/xfs_super.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 7d99bf82cd8b..ee8218c5eef0 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -542,10 +542,6 @@ linvfs_put_super(
 	}
 
 	vfs_deallocate(vfsp);
-
-	/* Reset device block size */
-	sector_size = bdev_hardsect_size(sb->s_bdev);
-	set_blocksize(sb->s_bdev, sector_size);
 }
 
 void
-- 
cgit v1.2.3


From 5423caba90d1ad0b37ef62073d52ccc9fa059369 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 01:56:11 +0200
Subject: XFS: Set inode operations later in xfs_iget_core

Modid: 2.5.x-xfs:slinx:128691a
---
 fs/xfs/xfs_iget.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 7a8b02b39557..d8a2d9f3c3d7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -118,13 +118,6 @@ xfs_iget_vnode_init(
 {
 	vp->v_vfsp  = XFS_MTOVFS(mp);
 	vp->v_type  = IFTOVT(ip->i_d.di_mode);
-
-	/* If we have a real type for an on-disk inode, we can set ops(&unlock)
-	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
-	 */
-	if (vp->v_type != VNON) {
-		linvfs_set_inode_ops(LINVFS_GET_IP(vp));
-	}
 }
 
 
@@ -159,7 +152,7 @@ xfs_iget_vnode_init(
  * bno -- the block number starting the buffer containing the inode,
  *	  if known (as by bulkstat), else 0.
  */
-int
+STATIC int
 xfs_iget_core(
 	vnode_t		*vp,
 	xfs_mount_t	*mp,
@@ -429,6 +422,14 @@ finish_inode:
 
 	*ipp = ip;
 
+	/*
+	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
+	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
+	 */
+	if (vp->v_type != VNON) {
+		linvfs_set_inode_ops(LINVFS_GET_IP(vp));
+	}
+
 	/* Update the linux inode */
 	error = vn_revalidate(vp, ATTR_COMM|ATTR_LAZY);
 
-- 
cgit v1.2.3


From c76586cc6cdc65e21393dd5948092722b15a9038 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 01:57:25 +0200
Subject: XFS: Handle NULL pagebufs gracefully in pagebuf_geterror

Modid: 2.5.x-xfs:slinx:128787a
---
 fs/xfs/pagebuf/page_buf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
index 6513fea00ded..f62d8d419fc6 100644
--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -308,8 +308,10 @@ extern void pagebuf_unlock(		/* unlock buffer		*/
 		page_buf_t *);		/* buffer to unlock		*/
 
 /* Buffer Utility Routines */
-
-#define pagebuf_geterror(pb)	((pb)->pb_error)
+static inline int pagebuf_geterror(page_buf_t *pb)
+{
+	return (pb ? pb->pb_error : ENOMEM);
+}
 
 extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
 		page_buf_t *);		/* buffer to mark		*/
-- 
cgit v1.2.3


From f118d8b959062dcc018311c44c3253daa14444c8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:00:14 +0200
Subject: XFS: Allow quota inode creation on a read-only filesystem.

Modid: 2.5.x-xfs:slinx:128905a
---
 fs/xfs/linux/xfs_lrw.c |  9 +++++++++
 fs/xfs/linux/xfs_lrw.h |  1 +
 fs/xfs/xfs_qm.c        | 33 ++++++++++++++++++++++-----------
 3 files changed, 32 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index c7467e2acce3..faf5fc7ba018 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -1863,3 +1863,12 @@ xfs_quotacheck_read_only(xfs_mount_t *mp)
 		"quotacheck required on readonly filesystem.");
 	return xfs_is_read_only(mp);
 }
+
+int
+xfs_quotaino_create_read_only(xfs_mount_t *mp)
+{
+        cmn_err(CE_NOTE, "XFS: WARNING: "
+                "Quota inode creation required on readonly filesystem.");
+        return xfs_is_read_only(mp);
+}
+
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 0ea2cfe9a860..4ea9c9f4bfb0 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -64,6 +64,7 @@ extern ssize_t xfs_write (
 
 extern int xfs_recover_read_only (xlog_t *);
 extern int xfs_quotacheck_read_only (xfs_mount_t *);
+extern int xfs_quotaino_create_read_only (xfs_mount_t *);
 
 extern void XFS_log_write_unmount_ro (bhv_desc_t *);
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index db7f44f0eb52..700ecd85cebe 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1998,11 +1998,16 @@ xfs_qm_init_quotainos(
 	int		error;
 	__int64_t	sbflags;
 	uint		flags;
+	int		readonly;
+	vfs_t		*vfsp;
 
 	ASSERT(mp->m_quotainfo);
 	uip = gip = NULL;
+	error = 0;
 	sbflags = 0;
 	flags = 0;
+	vfsp = XFS_MTOVFS(mp);
+	readonly = vfsp->vfs_flag & VFS_RDONLY;
 
 	/*
 	 * Get the uquota and gquota inodes
@@ -2034,38 +2039,44 @@ xfs_qm_init_quotainos(
 	/*
 	 * Create the two inodes, if they don't exist already. The changes
 	 * made above will get added to a transaction and logged in one of
-	 * the qino_alloc calls below.
+	 * the qino_alloc calls below.  If the device is readonly,
+	 * temporarily switch to read-write to do this.
 	 */
+
+	if (readonly &&
+	    ((XFS_IS_UQUOTA_ON(mp) && uip == NULL) || 
+	     (XFS_IS_GQUOTA_ON(mp) && gip == NULL))) {
+		if ((error = xfs_quotaino_create_read_only(mp)))
+			goto error;
+	}
+
 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-		if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
-			return XFS_ERROR(EROFS);
 		if ((error = xfs_qm_qino_alloc(mp, &uip,
 					      sbflags | XFS_SB_UQUOTINO,
 					      flags | XFS_QMOPT_UQUOTA)))
-			return XFS_ERROR(error);
+			goto error;
 
 		flags &= ~XFS_QMOPT_SBVERSION;
 	}
 	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
-		if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) {
-			if (uip)
-				VN_RELE(XFS_ITOV(uip));
-			return XFS_ERROR(EROFS);
-		}
 		if ((error = xfs_qm_qino_alloc(mp, &gip,
 					      sbflags | XFS_SB_GQUOTINO,
 					      flags | XFS_QMOPT_GQUOTA))) {
 			if (uip)
 				VN_RELE(XFS_ITOV(uip));
 
-			return XFS_ERROR(error);
+			goto error;
 		}
 	}
 
 	XFS_QI_UQIP(mp) = uip;
 	XFS_QI_GQIP(mp) = gip;
 
-	return (0);
+error:
+	if (readonly)
+		vfsp->vfs_flag |= VFS_RDONLY;
+
+	return XFS_ERROR(error);
 }
 
 
-- 
cgit v1.2.3


From 6ec8464dd82d2ad9a29ae388db27acaf8050930f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 02:06:33 +0200
Subject: XFS: Remove a dead variable.

Modid: 2.5.x-xfs:slinx:128913a
---
 fs/xfs/linux/xfs_super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index ee8218c5eef0..ea5d2c313ccc 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -530,9 +530,8 @@ void
 linvfs_put_super(
 	struct super_block	*sb)
 {
-	int			error;
-	int			sector_size;
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
 
 	VFS_DOUNMOUNT(vfsp, 0, NULL, NULL, error);
 	if (error) {
-- 
cgit v1.2.3


From 85493d6937d27318b1a16cb5c0e72841f35fb00b Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:40:32 +0200
Subject: XFS: Rework dev_t and linux inode handling

This is a two fold change, first it moves the translation between
linux dev_t and kdev_t up the call stack in xfs and makes the bulk
of xfs work in terms of its on disk dev_t format.  It also cleans
up a few related chunks of code.

The other part of the change reworks how we keep the linux inode
contents and the xfs inode fields in sync. A number of places where
we resynced the two have been removed, these were basically
replicating work elsewhere in the filesystem.   We now also ensure
that the inode fields are filled in before calling unlock_new_inode -
there used to be a window.

Finally all the code which hooks together the linux inode and the xfs
inode is brought together as a more coherent whole rather than being
cattered around the inode create path. Most calls to revalidate the
linux inode from the xfs inode are removed.

Modid: 2.5.x-xfs:slinx:128899a 10/02/02
---
 fs/xfs/linux/xfs_ioctl.c |  6 -----
 fs/xfs/linux/xfs_iops.c  | 50 +++++---------------------------------
 fs/xfs/linux/xfs_iops.h  |  1 -
 fs/xfs/linux/xfs_super.c | 12 +--------
 fs/xfs/linux/xfs_vfs.h   | 10 ++++++++
 fs/xfs/linux/xfs_vnode.c | 31 +++++++-----------------
 fs/xfs/linux/xfs_vnode.h |  6 ++---
 fs/xfs/xfs_iget.c        | 63 ++++++++++++++++++++----------------------------
 fs/xfs/xfs_inode.c       | 11 +++++----
 fs/xfs/xfs_inode.h       |  4 +--
 fs/xfs/xfs_qm.c          |  2 +-
 fs/xfs/xfs_types.h       | 22 ++++++++---------
 fs/xfs/xfs_utils.c       |  2 +-
 fs/xfs/xfs_utils.h       |  2 +-
 fs/xfs/xfs_vfsops.c      | 39 ++++++++++++++++++++++++++----
 fs/xfs/xfs_vnodeops.c    | 14 +++++------
 16 files changed, 118 insertions(+), 157 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
index 5dbf4fd9debf..1a5a26bbc217 100644
--- a/fs/xfs/linux/xfs_ioctl.c
+++ b/fs/xfs/linux/xfs_ioctl.c
@@ -264,12 +264,6 @@ xfs_vget_fsop_handlereq(
 	vpp = XFS_ITOV(ip);
 	inodep = LINVFS_GET_IP(vpp);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	error = linvfs_revalidate_core(inodep, ATTR_COMM);
-	if (error) {
-		iput(inodep);
-		/* this error is (-) but our callers expect + */
-		return XFS_ERROR(-error);
-	}
 
 	*vp = vpp;
 	*inode = inodep;
diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
index 3beca5b8fd34..be6b8b5306fa 100644
--- a/fs/xfs/linux/xfs_iops.c
+++ b/fs/xfs/linux/xfs_iops.c
@@ -98,7 +98,7 @@ linvfs_mknod(
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		va.va_rdev = rdev;
+		va.va_rdev = XFS_MKDEV(MAJOR(rdev), MINOR(rdev));
 		va.va_mask |= AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
@@ -122,8 +122,6 @@ linvfs_mknod(
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = to_kdev_t(rdev);
-		/* linvfs_revalidate_core returns (-) errors */
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 		validate_fields(dir);
 		d_instantiate(dentry, ip);
 		mark_inode_dirty_sync(ip);
@@ -186,7 +184,6 @@ linvfs_lookup(
 			VN_RELE(cvp);
 			return ERR_PTR(-EACCES);
 		}
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 	}
 	if (error && (error != ENOENT))
 		return ERR_PTR(-error);
@@ -278,8 +275,6 @@ linvfs_symlink(
 			error = ENOMEM;
 			VN_RELE(cvp);
 		} else {
-			/* linvfs_revalidate_core returns (-) errors */
-			error = -linvfs_revalidate_core(ip, ATTR_COMM);
 			d_instantiate(dentry, ip);
 			validate_fields(dir);
 			mark_inode_dirty_sync(ip);
@@ -441,16 +436,6 @@ linvfs_permission(
  * from the results of a getattr. This gets called out of things
  * like stat.
  */
-int
-linvfs_revalidate_core(
-	struct inode	*inode,
-	int		flags)
-{
-	vnode_t		*vp = LINVFS_GET_VP(inode);
-
-	/* vn_revalidate returns (-) error so this is ok */
-	return vn_revalidate(vp, flags);
-}
 
 STATIC int
 linvfs_getattr(
@@ -463,7 +448,7 @@ linvfs_getattr(
 	int		error = 0;
 
 	if (unlikely(vp->v_flag & VMODIFIED)) {
-		error = linvfs_revalidate_core(inode, 0);
+		error = vn_revalidate(vp);
 	}
 	if (!error)
 		generic_fillattr(inode, stat);
@@ -528,7 +513,7 @@ linvfs_setattr(
 	}
 
 	if (!error) {
-		vn_revalidate(vp, 0);
+		vn_revalidate(vp);
 		mark_inode_dirty_sync(inode);
 	}
 	return error;
@@ -618,30 +603,17 @@ linvfs_setxattr(
 		error = -ENOATTR;
 		p += xfs_namespaces[SYSTEM_NAMES].namelen;
 		if (strcmp(p, POSIXACL_ACCESS) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-			if (!error) {
-				VMODIFY(vp);
-				error = linvfs_revalidate_core(inode, 0);
-			}
 		}
 		else if (strcmp(p, POSIXACL_DEFAULT) == 0) {
-			error = linvfs_revalidate_core(inode, 0);
-			if (error)
-				return error;
 			error = xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-			if (!error) {
-				VMODIFY(vp);
-				error = linvfs_revalidate_core(inode, 0);
-			}
 		}
 		else if (strcmp(p, POSIXCAP) == 0) {
 			error = xfs_cap_vset(vp, data, size);
 		}
+		if (!error) {
+			error = vn_revalidate(vp);
+		}
 		return error;
 	}
 
@@ -689,19 +661,9 @@ linvfs_getxattr(
 		error = -ENOATTR;
 		p += xfs_namespaces[SYSTEM_NAMES].namelen;
 		if (strcmp(p, POSIXACL_ACCESS) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
 		}
 		else if (strcmp(p, POSIXACL_DEFAULT) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
 		}
 		else if (strcmp(p, POSIXCAP) == 0) {
diff --git a/fs/xfs/linux/xfs_iops.h b/fs/xfs/linux/xfs_iops.h
index 3c4529374aec..c5ce4a6ea9f9 100644
--- a/fs/xfs/linux/xfs_iops.h
+++ b/fs/xfs/linux/xfs_iops.h
@@ -65,7 +65,6 @@ extern struct file_operations linvfs_dir_operations;
 
 extern struct address_space_operations linvfs_aops;
 
-extern int linvfs_revalidate_core(struct inode *, int);
 extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index ea5d2c313ccc..0d2bc0e31728 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -390,7 +390,6 @@ linvfs_fill_super(
 		goto fail_unmount;
 
 	ip = LINVFS_GET_IP(rootvp);
-	linvfs_revalidate_core(ip, ATTR_COMM);
 
 	sb->s_root = d_alloc_root(ip);
 	if (!sb->s_root)
@@ -444,12 +443,6 @@ linvfs_set_inode_ops(
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 
-	inode->i_mode = VTTOIF(vp->v_type);
-
-	/* If this isn't a new inode, nothing to do */
-	if (!(inode->i_state & I_NEW))
-		return;
-
 	if (vp->v_type == VNON) {
 		make_bad_inode(inode);
 	} else if (S_ISREG(inode->i_mode)) {
@@ -468,8 +461,6 @@ linvfs_set_inode_ops(
 		init_special_inode(inode, inode->i_mode,
 					kdev_t_to_nr(inode->i_rdev));
 	}
-
-	unlock_new_inode(inode);
 }
 
 /*
@@ -530,8 +521,8 @@ void
 linvfs_put_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
 	int			error;
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
 
 	VFS_DOUNMOUNT(vfsp, 0, NULL, NULL, error);
 	if (error) {
@@ -674,7 +665,6 @@ linvfs_get_parent(
 			VN_RELE(cvp);
 			return ERR_PTR(-EACCES);
 		}
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 	}
 	if (error)
 		return ERR_PTR(-error);
diff --git a/fs/xfs/linux/xfs_vfs.h b/fs/xfs/linux/xfs_vfs.h
index 0f384eb8220f..381cb9d7c6d4 100644
--- a/fs/xfs/linux/xfs_vfs.h
+++ b/fs/xfs/linux/xfs_vfs.h
@@ -92,6 +92,8 @@ typedef struct vfsops {
 					/* send dmapi mount event */
 	int	(*vfs_dmapi_fsys_vector)(bhv_desc_t *,
 					 struct dm_fcntl_vector *);
+	void	(*vfs_init_vnode)(bhv_desc_t *, struct vnode *,
+					bhv_desc_t *, int);
 	void	(*vfs_force_shutdown)(bhv_desc_t *,
 					int, char *, int);
 } vfsops_t;
@@ -132,6 +134,14 @@ typedef struct vfsops {
 	rv = (*(VFS_FOPS(vfsp)->vfs_vget))((vfsp)->vfs_fbhv, vpp, fidp);  \
 	BHV_READ_UNLOCK(&(vfsp)->vfs_bh); \
 }
+
+#define VFS_INIT_VNODE(vfsp, vp, bhv, unlock) \
+{	\
+	BHV_READ_LOCK(&(vfsp)->vfs_bh); \
+	(*(VFS_FOPS(vfsp)->vfs_init_vnode))((vfsp)->vfs_fbhv, vp, bhv, unlock);\
+	BHV_READ_UNLOCK(&(vfsp)->vfs_bh); \
+}
+
 /* No behavior lock here */
 #define VFS_FORCE_SHUTDOWN(vfsp, flags) \
 	(*(VFS_FOPS(vfsp)->vfs_force_shutdown))((vfsp)->vfs_fbhv, flags, __FILE__, __LINE__);
diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
index 0d4cb5ea14eb..9689236c6ae5 100644
--- a/fs/xfs/linux/xfs_vnode.c
+++ b/fs/xfs/linux/xfs_vnode.c
@@ -203,7 +203,7 @@ vn_get(struct vnode *vp, vmap_t *vmap)
  * "revalidate" the linux inode.
  */
 int
-vn_revalidate(struct vnode *vp, int flags)
+vn_revalidate(struct vnode *vp)
 {
 	int		error;
 	struct inode	*inode;
@@ -215,7 +215,7 @@ vn_revalidate(struct vnode *vp, int flags)
 
 	ASSERT(vp->v_bh.bh_first != NULL);
 
-	VOP_GETATTR(vp, &va, flags & ATTR_LAZY, NULL, error);
+	VOP_GETATTR(vp, &va, 0, NULL, error);
 
 	if (! error) {
 		inode = LINVFS_GET_IP(vp);
@@ -225,27 +225,14 @@ vn_revalidate(struct vnode *vp, int flags)
 		inode->i_nlink	    = va.va_nlink;
 		inode->i_uid	    = va.va_uid;
 		inode->i_gid	    = va.va_gid;
-		inode->i_rdev	    = mk_kdev(MAJOR(va.va_rdev),
-						MINOR(va.va_rdev));
-		inode->i_blksize    = PAGE_CACHE_SIZE;
+		inode->i_rdev	    = XFS_DEV_TO_KDEVT(va.va_rdev);
 		inode->i_generation = va.va_gencount;
-		if ((flags & ATTR_COMM) ||
-		    S_ISREG(inode->i_mode) ||
-		    S_ISDIR(inode->i_mode) ||
-		    S_ISLNK(inode->i_mode)) {
-			inode->i_size	    = va.va_size;
-			inode->i_blocks	    = va.va_nblocks;
-			inode->i_atime	    = va.va_atime.tv_sec;
-			inode->i_mtime	    = va.va_mtime.tv_sec;
-			inode->i_ctime	    = va.va_ctime.tv_sec;
-		}
-		if (flags & ATTR_LAZY)
-			vp->v_flag &= ~VMODIFIED;
-		else
-			VUNMODIFY(vp);
-	} else {
-		vn_trace_exit(vp, "vn_revalidate.error",
-					(inst_t *)__return_address);
+		inode->i_size	    = va.va_size;
+		inode->i_blocks	    = va.va_nblocks;
+		inode->i_mtime	    = va.va_mtime.tv_sec;
+		inode->i_ctime	    = va.va_ctime.tv_sec;
+		inode->i_atime	    = va.va_atime.tv_sec;
+		VUNMODIFY(vp);
 	}
 
 	return -error;
diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
index bf6025bfe0a4..0cc85e88ca5f 100644
--- a/fs/xfs/linux/xfs_vnode.h
+++ b/fs/xfs/linux/xfs_vnode.h
@@ -528,14 +528,14 @@ typedef struct vattr {
 	mode_t		va_mode;	/* file access mode */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
-	dev_t		va_fsid;	/* file system id (dev for now) */
+	xfs_dev_t	va_fsid;	/* file system id (dev for now) */
 	xfs_ino_t	va_nodeid;	/* node id */
 	nlink_t		va_nlink;	/* number of references to file */
 	xfs_off_t	va_size;	/* file size in bytes */
 	timespec_t	va_atime;	/* time of last access */
 	timespec_t	va_mtime;	/* time of last modification */
 	timespec_t	va_ctime;	/* time file ``created'' */
-	dev_t		va_rdev;	/* device the file represents */
+	xfs_dev_t	va_rdev;	/* device the file represents */
 	u_long		va_blksize;	/* fundamental block size */
 	__int64_t	va_nblocks;	/* # of blocks allocated */
 	u_long		va_vcode;	/* version code */
@@ -642,7 +642,7 @@ typedef struct vnode_map {
 				 (vmap).v_ino	 = (ip)->i_ino; }
 extern void	vn_purge(struct vnode *, vmap_t *);
 extern vnode_t	*vn_get(struct vnode *, vmap_t *);
-extern int	vn_revalidate(struct vnode *, int);
+extern int	vn_revalidate(struct vnode *);
 extern void	vn_remove(struct vnode *);
 
 static inline int vn_count(struct vnode *vp)
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d8a2d9f3c3d7..b4eb0200e7bd 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -31,6 +31,7 @@
  */
 
 #include <xfs.h>
+#include <linux/pagemap.h>
 
 
 /*
@@ -109,17 +110,35 @@ xfs_chash_free(xfs_mount_t *mp)
 	mp->m_chash = NULL;
 }
 
-
-static inline void
-xfs_iget_vnode_init(
+void
+xfs_revalidate_inode(
 	xfs_mount_t	*mp,
 	vnode_t		*vp,
 	xfs_inode_t	*ip)
 {
-	vp->v_vfsp  = XFS_MTOVFS(mp);
-	vp->v_type  = IFTOVT(ip->i_d.di_mode);
-}
+	struct inode	*inode = LINVFS_GET_IP(vp);
 
+	inode->i_mode	= (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+	inode->i_nlink	= ip->i_d.di_nlink;
+	inode->i_uid	= ip->i_d.di_uid;
+	inode->i_gid 	= ip->i_d.di_gid;
+	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+		inode->i_rdev	= NODEV;
+	} else {
+		xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+		inode->i_rdev	= XFS_DEV_TO_KDEVT(dev);
+	}
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_generation = ip->i_d.di_gen;
+	inode->i_size	= ip->i_d.di_size;
+	inode->i_blocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+	inode->i_atime	= ip->i_d.di_atime.t_sec;
+	inode->i_mtime	= ip->i_d.di_mtime.t_sec;
+	inode->i_ctime	= ip->i_d.di_ctime.t_sec;
+
+	vp->v_flag &= ~VMODIFIED;
+}
 
 /*
  * Look up an inode by number in the given file system.
@@ -198,16 +217,9 @@ again:
 					goto again;
 				}
 
-				xfs_iget_vnode_init(mp, vp, ip);
-
 				vn_trace_exit(vp, "xfs_iget.alloc",
 					(inst_t *)__return_address);
 
-				bhv_desc_init(&(ip->i_bhv_desc), ip, vp,
-							&xfs_vnodeops);
-				vn_bhv_insert_initial(VN_BHV_HEAD(vp),
-							&(ip->i_bhv_desc));
-
 				XFS_STATS_INC(xfsstats.xs_ig_found);
 
 				read_unlock(&ih->ih_lock);
@@ -240,11 +252,6 @@ again:
 
 			XFS_STATS_INC(xfsstats.xs_ig_found);
 
-			/*
-			 * Make sure the vnode and the inode are hooked up
-			 */
-			xfs_iget_vnode_init(mp, vp, ip);
-
 finish_inode:
 			if (lock_flags != 0) {
 				xfs_ilock(ip, lock_flags);
@@ -281,19 +288,8 @@ finish_inode:
 		return error;
 	}
 
-	/*
-	 * Vnode provided by vn_initialize.
-	 */
-
-	xfs_iget_vnode_init(mp, vp, ip);
-
 	vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
 
-	if (vp->v_fbhv == NULL) {
-		bhv_desc_init(&(ip->i_bhv_desc), ip, vp, &xfs_vnodeops);
-		vn_bhv_insert_initial(VN_BHV_HEAD(vp), &(ip->i_bhv_desc));
-	}
-
 	xfs_inode_lock_init(ip, vp);
 	xfs_iocore_inode_init(ip);
 
@@ -426,12 +422,7 @@ finish_inode:
 	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
 	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
 	 */
-	if (vp->v_type != VNON) {
-		linvfs_set_inode_ops(LINVFS_GET_IP(vp));
-	}
-
-	/* Update the linux inode */
-	error = vn_revalidate(vp, ATTR_COMM|ATTR_LAZY);
+	VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
 
 	return 0;
 }
@@ -496,7 +487,6 @@ inode_allocate:
 			newnode = (ip->i_d.di_mode == 0);
 			if (newnode)
 				xfs_iocore_inode_reinit(ip);
-			vn_revalidate(vp, ATTR_COMM|ATTR_LAZY);
 			XFS_STATS_INC(xfsstats.xs_ig_found);
 			*ipp = ip;
 			error = 0;
@@ -507,7 +497,6 @@ inode_allocate:
 	return error;
 }
 
-
 /*
  * Do the setup for the various locks within the incore inode.
  */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f7c35e754899..35506332b70e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1015,7 +1015,7 @@ xfs_ialloc(
 	xfs_inode_t	*pip,
 	mode_t		mode,
 	nlink_t		nlink,
-	dev_t		rdev,
+	xfs_dev_t	rdev,
 	cred_t		*cr,
 	xfs_prid_t	prid,
 	int		okalloc,
@@ -1067,9 +1067,6 @@ xfs_ialloc(
 	ip->i_d.di_projid = prid;
 	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
 
-	/* now that we have a v_type we can set Linux inode ops (& unlock) */
-	linvfs_set_inode_ops(LINVFS_GET_IP(XFS_ITOV(ip)));
-
 	/*
 	 * If the superblock version is up to where we support new format
 	 * inodes and this is currently an old format inode, then change
@@ -1128,7 +1125,7 @@ xfs_ialloc(
 	case IFBLK:
 	case IFSOCK:
 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
-		ip->i_df.if_u2.if_rdev = IRIX_MKDEV(MAJOR(rdev), MINOR(rdev));
+		ip->i_df.if_u2.if_rdev = rdev;
 		ip->i_df.if_flags = 0;
 		flags |= XFS_ILOG_DEV;
 		break;
@@ -1172,6 +1169,10 @@ xfs_ialloc(
 	 * Log the new values stuffed into the inode.
 	 */
 	xfs_trans_log_inode(tp, ip, flags);
+
+	/* now that we have a v_type we can set Linux inode ops (& unlock) */
+	VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
+
 	*ipp = ip;
 	return 0;
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 308bc90d825f..e3c2977ef18c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -516,7 +516,7 @@ int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
 int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
-			   dev_t, struct cred *, xfs_prid_t, int,
+			   xfs_dev_t, struct cred *, xfs_prid_t, int,
 			   struct xfs_buf **, boolean_t *, xfs_inode_t **);
 void		xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
 			   xfs_arch_t);
@@ -550,7 +550,7 @@ void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
-
+void xfs_revalidate_inode(struct xfs_mount *, vnode_t *vp, xfs_inode_t *);
 
 #ifdef DEBUG
 void		xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 700ecd85cebe..3b87ef6c7d00 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1429,7 +1429,7 @@ xfs_qm_qino_alloc(
 	}
 	bzero(&zerocr, sizeof(zerocr));
 
-	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, mp->m_dev,
+	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, 0,
 				   &zerocr, 0, 1, ip, &committed))) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index aa6fe38dde5e..9bc2d90d426f 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -315,16 +315,16 @@ extern struct xfsstats xfsstats;
 #define MKDEV(major, minor)	makedev(major, minor)
 #endif
 
-#define IRIX_DEV_BITSMAJOR	14
-#define IRIX_DEV_BITSMINOR	18
-#define IRIX_DEV_MAXMAJ		0x1ff
-#define IRIX_DEV_MAXMIN		0x3ffff
-#define IRIX_DEV_MAJOR(dev)	((int)(((unsigned)(dev)>>IRIX_DEV_BITSMINOR) \
-				    & IRIX_DEV_MAXMAJ))
-#define IRIX_DEV_MINOR(dev)	((int)((dev)&IRIX_DEV_MAXMIN))
-#define IRIX_MKDEV(major,minor) ((xfs_dev_t)(((major)<<IRIX_DEV_BITSMINOR) \
-				    | (minor&IRIX_DEV_MAXMIN)))
-
-#define IRIX_DEV_TO_KDEVT(dev)	MKDEV(IRIX_DEV_MAJOR(dev),IRIX_DEV_MINOR(dev))
+#define XFS_DEV_BITSMAJOR	14
+#define XFS_DEV_BITSMINOR	18
+#define XFS_DEV_MAXMAJ		0x1ff
+#define XFS_DEV_MAXMIN		0x3ffff
+#define XFS_DEV_MAJOR(dev)	((int)(((unsigned)(dev)>>XFS_DEV_BITSMINOR) \
+				    & XFS_DEV_MAXMAJ))
+#define XFS_DEV_MINOR(dev)	((int)((dev)&XFS_DEV_MAXMIN))
+#define XFS_MKDEV(major,minor) ((xfs_dev_t)(((major)<<XFS_DEV_BITSMINOR) \
+				    | (minor&XFS_DEV_MAXMIN)))
+
+#define XFS_DEV_TO_KDEVT(dev)	mk_kdev(XFS_DEV_MAJOR(dev),XFS_DEV_MINOR(dev))
 
 #endif	/* !__XFS_TYPES_H */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 9fcee5b06cdc..54ed6fec00a5 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -132,7 +132,7 @@ xfs_dir_ialloc(
 					   the inode. */
 	mode_t		mode,
 	nlink_t		nlink,
-	dev_t		rdev,
+	xfs_dev_t	rdev,
 	cred_t		*credp,
 	prid_t		prid,		/* project id */
 	int		okalloc,	/* ok to allocate new space */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index db4da1029291..ac8f5b92ba0f 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -76,7 +76,7 @@ xfs_dir_ialloc(
 	struct xfs_inode	*dp,
 	mode_t			mode,
 	nlink_t			nlink,
-	dev_t			rdev,
+	xfs_dev_t		rdev,
 	struct cred		*credp,
 	prid_t			prid,
 	int			okalloc,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 297cfe03e845..263ee3310b50 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1571,6 +1571,39 @@ xfs_syncsub(
 	return XFS_ERROR(last_error);
 }
 
+STATIC void
+xfs_initialize_vnode(
+	bhv_desc_t	*bdp,
+	vnode_t		*vp,
+	bhv_desc_t	*inode_bhv,
+	int		unlock)
+{
+	xfs_inode_t	*ip = XFS_BHVTOI(inode_bhv);
+	struct inode	*inode = LINVFS_GET_IP(vp);
+
+	if (vp->v_fbhv == NULL) {
+		vp->v_vfsp = bhvtovfs(bdp);
+		bhv_desc_init(&(ip->i_bhv_desc), ip, vp, &xfs_vnodeops);
+		bhv_insert_initial(VN_BHV_HEAD(vp), &(ip->i_bhv_desc));
+	}
+
+	vp->v_type = IFTOVT(ip->i_d.di_mode);
+	/* Have we been called during the new inode create process,
+	 * in which case we are too early to fill in the linux inode.
+	 */
+	if (vp->v_type == VNON)
+		return;
+
+	xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+
+	/* For new inodes we need to set the ops vectors,
+	 * and unlock the inode.
+	 */
+	if (unlock && (inode->i_state & I_NEW)) {
+		linvfs_set_inode_ops(inode);
+		unlock_new_inode(inode);
+	}
+}
 
 /*
  * xfs_vget - called by DMAPI to get vnode from file handle
@@ -1623,11 +1656,6 @@ xfs_vget(
 	inode = LINVFS_GET_IP((*vpp));
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-	error = linvfs_revalidate_core(inode, ATTR_COMM);
-	if (error) {
-		iput(inode);
-		return XFS_ERROR(error);
-	}
 	return 0;
 }
 
@@ -1640,6 +1668,7 @@ vfsops_t xfs_vfsops = {
 	.vfs_statvfs		= xfs_statvfs,
 	.vfs_sync		= xfs_sync,
 	.vfs_vget		= xfs_vget,
+	.vfs_init_vnode		= xfs_initialize_vnode,
 	.vfs_force_shutdown	= xfs_do_force_shutdown,
 #ifdef CONFIG_XFS_DMAPI
 	.vfs_dmapi_mount	= xfs_dm_mount,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 46c42a2cd8ed..4f97135ba1ef 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -208,7 +208,7 @@ xfs_getattr(
 				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
 		}
 	} else {
-		vap->va_rdev = IRIX_DEV_TO_KDEVT(ip->i_df.if_u2.if_rdev);
+		vap->va_rdev = ip->i_df.if_u2.if_rdev;
 		vap->va_blksize = BLKDEV_IOSIZE;
 	}
 
@@ -1970,7 +1970,7 @@ xfs_create(
 	vnode_t			*vp=NULL;
 	xfs_trans_t		*tp;
 	xfs_mount_t		*mp;
-	dev_t			rdev;
+	xfs_dev_t		rdev;
 	int			error;
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
@@ -2955,8 +2955,7 @@ xfs_mkdir(
 	xfs_inode_t		*cdp;	/* inode of created dir */
 	vnode_t			*cvp;	/* vnode of created dir */
 	xfs_trans_t		*tp;
-	dev_t			rdev;
-	mode_t			mode;
+	xfs_dev_t		rdev;
 	xfs_mount_t		*mp;
 	int			cancel_flags;
 	int			error;
@@ -3062,8 +3061,9 @@ xfs_mkdir(
 	 * create the directory inode.
 	 */
 	rdev = (vap->va_mask & AT_RDEV) ? vap->va_rdev : 0;
-	mode = IFDIR | (vap->va_mode & ~IFMT);
-	error = xfs_dir_ialloc(&tp, dp, mode, 2, rdev, credp, prid, resblks > 0,
+	error = xfs_dir_ialloc(&tp, dp, 
+			MAKEIMODE(vap->va_type,vap->va_mode), 2,
+			rdev, credp, prid, resblks > 0,
 		&cdp, NULL);
 	if (error) {
 		if (error == ENOSPC)
@@ -3521,7 +3521,7 @@ xfs_symlink(
 	xfs_inode_t		*ip;
 	int			error;
 	int			pathlen;
-	dev_t			rdev;
+	xfs_dev_t		rdev;
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
 	boolean_t		dp_joined_to_trans;
-- 
cgit v1.2.3


From 9c8a5116320dbfb594dd8838b22c7e47eb99863d Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:44:01 +0200
Subject: XFS: fix xmount command in xfsidbg.

Modid: 2.5.x-xfs:slinx:128998a
---
 fs/xfs/xfsidbg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 28ec0bd10af3..8e685d132902 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -4438,9 +4438,9 @@ xfsidbg_xmount(xfs_mount_t *mp)
 		mp->m_ail_gen, &mp->m_sb);
 	kdb_printf("sb_lock 0x%p sb_bp 0x%p dev 0x%x logdev 0x%x rtdev 0x%x\n",
 		&mp->m_sb_lock, mp->m_sb_bp,
-		mp->m_ddev_targp->pbr_dev,
-		mp->m_logdev_targp->pbr_dev,
-		mp->m_rtdev_targp->pbr_dev);
+		mp->m_ddev_targp ? mp->m_ddev_targp->pbr_dev : 0,
+		mp->m_logdev_targp ? mp->m_logdev_targp->pbr_dev : 0,
+		mp->m_rtdev_targp ? mp->m_rtdev_targp->pbr_dev : 0);
 	kdb_printf("bsize %d agfrotor %d agirotor %d ihash 0x%p ihsize %d\n",
 		mp->m_bsize, mp->m_agfrotor, mp->m_agirotor,
 		mp->m_ihash, mp->m_ihsize);
-- 
cgit v1.2.3


From 80a107cf24f37a3db263bfa21a29d142e0a16d04 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:44:50 +0200
Subject: XFS: Get xfs debug module back in sync with current pagebuf flags.

Modid: 2.5.x-xfs:slinx:129049a
---
 fs/xfs/xfsidbg.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 8e685d132902..53e2145be857 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -1700,18 +1700,19 @@ static int	kdbm_vn(
 /* pagebuf stuff */
 
 static char	*pb_flag_vals[] = {
-	"READ", "WRITE", "MAPPED", "PARTIAL",
-	"ASYNC", "NONE", "DELWRI", "FREED", "SYNC",
-	"MAPPABLE", "STALE", "FS_MANAGED", "RELEASE",
-	"LOCK", "TRYLOCK", "ALLOCATE", "FILE_ALLOCATE", "DONT_BLOCK",
-	"DIRECT", "LOCKABLE", "NEXT_KEY", "ENTER_PAGES",
-	"ALL_PAGES_MAPPED", "SOME_INVALID_PAGES", "ADDR_ALLOCATED",
-	"MEM_ALLOCATED", "GRIO", "FORCEIO", "SHUTDOWN",
-	NULL };
+/*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL",
+/*  4 */ "ASYNC", "NONE", "DELWRI", "FREED", "SYNC",
+/*  9 */ "MAPPABLE", "STALE", "FS_MANAGED", "RELEASE",
+/* 13 */ "LOCK", "TRYLOCK", "ALLOCATE", "FILE_ALLOCATE", "DONT_BLOCK",
+/* 18 */ "DIRECT", "LOCKABLE", "PRIVATE_BH", "ENTER_PAGES",
+/* 22 */ "ALL_PAGES_MAPPED", "SOME_INVALID_PAGES", "ADDR_ALLOCATED",
+/* 25 */ "MEM_ALLOCATED", "INVALID26", "FORCEIO", "FLUSH",
+/* 29 */ "READ_AHEAD", "INVALID30", "FS_RESERVED",
+	 NULL };
 
 static char	*pbm_flag_vals[] = {
-	"EOF", "HOLE", "DELAY", "FLUSH_OVERLAPS",
-	"READAHEAD", "UNWRITTEN", "DONTALLOC", "NEW",
+	"EOF", "HOLE", "DELAY", "INVALID0x08",
+	"INVALID0x10", "UNWRITTEN", "INVALID0x40", "INVALID0x80",
 	NULL };
 
 
-- 
cgit v1.2.3


From 3cc814f12cc58a13504b46c69a4fbfb5fb2cf2c0 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:45:46 +0200
Subject: XFS: fix parsing of extents by debug code

Modid: 2.5.x-xfs:slinx:129079a
---
 fs/xfs/xfsidbg.c | 69 +++++++++++++++-----------------------------------------
 1 file changed, 18 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 53e2145be857..ac8d98287b3c 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -2251,8 +2251,6 @@ static void xfs_btalloc(xfs_alloc_block_t *bt, int bsz);
 static void xfs_btbmap(xfs_bmbt_block_t *bt, int bsz);
 static void xfs_btino(xfs_inobt_block_t *bt, int bsz);
 static void xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary);
-static void xfs_convert_extent(xfs_bmbt_rec_64_t *rp, xfs_dfiloff_t *op,
-			xfs_dfsbno_t *sp, xfs_dfilblks_t *cp, int *fp);
 static void xfs_dastate_path(xfs_da_state_path_t *p);
 static void xfs_dir2data(void *addr, int size);
 static void xfs_dir2leaf(xfs_dir2_leaf_t *leaf, int size);
@@ -2360,18 +2358,17 @@ xfs_btbmap(xfs_bmbt_block_t *bt, int bsz)
 	kdb_printf("rightsib %Lx\n", INT_GET(bt->bb_rightsib, ARCH_CONVERT));
 	if (INT_ISZERO(bt->bb_level, ARCH_CONVERT)) {
 		for (i = 1; i <= INT_GET(bt->bb_numrecs, ARCH_CONVERT); i++) {
-			xfs_bmbt_rec_64_t *r;
-			xfs_dfiloff_t o;
-			xfs_dfsbno_t s;
-			xfs_dfilblks_t c;
-			int fl;
+			xfs_bmbt_rec_t *r;
+			xfs_bmbt_irec_t	irec;
 
-			r = (xfs_bmbt_rec_64_t *)XFS_BTREE_REC_ADDR(bsz,
+			r = (xfs_bmbt_rec_t *)XFS_BTREE_REC_ADDR(bsz,
 				xfs_bmbt, bt, i, 0);
-			xfs_convert_extent(r, &o, &s, &c, &fl);
-			kdb_printf("rec %d startoff %Ld ", i, o);
-			kdb_printf("startblock %Lx ", s);
-			kdb_printf("blockcount %Ld flag %d\n", c, fl);
+
+			xfs_bmbt_get_all((xfs_bmbt_rec_t *)r, &irec);
+			kdb_printf("rec %d startoff %Ld startblock %Lx blockcount %Ld flag %d\n",
+				i, irec.br_startoff,
+				(__uint64_t)irec.br_startblock, 
+				irec.br_blockcount, irec.br_state);
 		}
 	} else {
 		int mxr;
@@ -2473,31 +2470,6 @@ xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary)
 	kdb_printf("\n");
 }
 
-/*
- * Convert an external extent descriptor to internal form.
- */
-static void
-xfs_convert_extent(xfs_bmbt_rec_64_t *rp, xfs_dfiloff_t *op, xfs_dfsbno_t *sp,
-		   xfs_dfilblks_t *cp, int *fp)
-{
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
-
-	flag = (int)((INT_GET(rp->l0, ARCH_CONVERT)) >> (64 - 1 ));
-	o = ((xfs_fileoff_t)INT_GET(rp->l0, ARCH_CONVERT) &
-			   (((__uint64_t)1 << ( 64 - 1	)) - 1) ) >> 9;
-	s = (((xfs_fsblock_t)INT_GET(rp->l0, ARCH_CONVERT) & (((__uint64_t)1 << ( 9 )) - 1) ) << 43) |
-			   (((xfs_fsblock_t)INT_GET(rp->l1, ARCH_CONVERT)) >> 21);
-	c = (xfs_filblks_t)(INT_GET(rp->l1, ARCH_CONVERT) & (((__uint64_t)1 << ( 21 )) - 1) );
-	*op = o;
-	*sp = s;
-	*cp = c;
-	*fp = flag;
-}
-
-
 /*
  * Print an xfs_da_state_path structure.
  */
@@ -2891,11 +2863,8 @@ static void
 xfs_xexlist_fork(xfs_inode_t *ip, int whichfork)
 {
 	int nextents, i;
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
 	xfs_ifork_t *ifp;
+	xfs_bmbt_irec_t irec;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_flags & XFS_IFEXTENTS) {
@@ -2903,12 +2872,12 @@ xfs_xexlist_fork(xfs_inode_t *ip, int whichfork)
 		kdb_printf("inode 0x%p %cf extents 0x%p nextents 0x%x\n",
 			ip, "da"[whichfork], ifp->if_u1.if_extents, nextents);
 		for (i = 0; i < nextents; i++) {
-			xfs_convert_extent(
-				(xfs_bmbt_rec_64_t *)&ifp->if_u1.if_extents[i],
-				&o, &s, &c, &flag);
+			xfs_bmbt_get_all(&ifp->if_u1.if_extents[i], &irec);
 			kdb_printf(
 		"%d: startoff %Ld startblock %s blockcount %Ld flag %d\n",
-				i, o, xfs_fmtfsblock(s, ip->i_mount), c, flag);
+			i, irec.br_startoff,
+			xfs_fmtfsblock(irec.br_startblock, ip->i_mount),
+			irec.br_blockcount, irec.br_state);
 		}
 	}
 }
@@ -3200,14 +3169,12 @@ xfsidbg_xbmalla(xfs_bmalloca_t *a)
 static void
 xfsidbg_xbrec(xfs_bmbt_rec_64_t *r)
 {
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
+	xfs_bmbt_irec_t	irec;
 
-	xfs_convert_extent(r, &o, &s, &c, &flag);
+	xfs_bmbt_get_all((xfs_bmbt_rec_t *)r, &irec);
 	kdb_printf("startoff %Ld startblock %Lx blockcount %Ld flag %d\n",
-		o, s, c, flag);
+		irec.br_startoff, (__uint64_t)irec.br_startblock, 
+		irec.br_blockcount, irec.br_state);
 }
 
 /*
-- 
cgit v1.2.3


From 1e751b469647001771ae4df919035b94d2b9d74d Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:46:31 +0200
Subject: XFS: fix 2.5 specific code for small block size filesystems, there
 was a

Modid: 2.5.x-xfs:slinx:129109a
---
 fs/xfs/pagebuf/page_buf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index c98dc4637050..7fe0d77db5c6 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -545,9 +545,9 @@ _pagebuf_lookup_pages(
 		size -= nbytes;
 
 		if (!PageUptodate(page)) {
-			if ((blocksize == PAGE_CACHE_SIZE) &&
-			    (flags & PBF_READ)) {
-				pb->pb_locked = 1;
+			if (blocksize == PAGE_CACHE_SIZE) {
+				if (flags & PBF_READ)
+					pb->pb_locked = 1;
 				good_pages--;
 			} else if (!PagePrivate(page)) {
 				unsigned long i, range = (offset + nbytes) >> SECTOR_SHIFT;
-- 
cgit v1.2.3


From df4173311ed766d8c7b825438a31179382e8216f Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:47:20 +0200
Subject: XFS: Add missing newlines to cmn_err messages.

Modid: 2.5.x-xfs:slinx:129117a
---
 fs/xfs/support/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 795056094248..b86f415e2f70 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -89,10 +89,10 @@ cmn_err(register int level, char *fmt, ...)
 	switch (level) {
 	case CE_CONT:
 	case CE_WARN:
-		printk("%s", message);
+		printk("%s\n", message);
 		break;
 	case CE_DEBUG:
-		xdprintk("%s", message);
+		xdprintk("%s\n", message);
 		break;
 	default:
 		printk("%s\n", message);
-- 
cgit v1.2.3


From c21d6ee9a7689af3b11137e8f4578c6961ca0e65 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:48:13 +0200
Subject: XFS: add some tracing calls in the read/write path

Modid: 2.5.x-xfs:slinx:129126a
---
 fs/xfs/linux/xfs_lrw.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index faf5fc7ba018..d1e92fbb0523 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -136,11 +136,14 @@ xfs_read(
 	xfs_fsize_t		n;
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
+	vnode_t			*vp;
 	unsigned long		seg;
 	int			direct = filp->f_flags & O_DIRECT;
 
 	ip = XFS_BHVTOI(bdp);
+	vp = BHV_TO_VNODE(bdp);
 	mp = ip->i_mount;
+	vn_trace_entry(vp, "xfs_read", (inst_t *)__return_address);
 
 	XFS_STATS_INC(xfsstats.xs_read_calls);
 
@@ -194,7 +197,7 @@ xfs_read(
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
 	    !(filp->f_mode & FINVIS)) {
 		int error;
 		vrwlock_t locktype = VRWLOCK_READ;
@@ -469,6 +472,7 @@ xfs_write(
 	XFS_STATS_INC(xfsstats.xs_write_calls);
 
 	vp = BHV_TO_VNODE(bdp);
+	vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address);
 	xip = XFS_BHVTOI(bdp);
 
 	/* START copy & waste from filemap.c */
@@ -877,6 +881,8 @@ xfs_strategy(bhv_desc_t *bdp,
 	xfs_trans_t	*tp;
 
 	ip = XFS_BHVTOI(bdp);
+	vn_trace_entry(BHV_TO_VNODE(bdp), "xfs_strategy",
+					(inst_t *)__return_address);
 	io = &ip->i_iocore;
 	mp = ip->i_mount;
 	/* is_xfs = IO_IS_XFS(io); */
-- 
cgit v1.2.3


From f93d3d7d99e5911e702026a18b690031ce5bd270 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 02:48:58 +0200
Subject: XFS: simplify the xfs flush and flushinvalidate calls down the what

Modid: 2.5.x-xfs:slinx:129128a
---
 fs/xfs/linux/xfs_fs_subr.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_fs_subr.c b/fs/xfs/linux/xfs_fs_subr.c
index 8d50bd04d718..eea74fce0050 100644
--- a/fs/xfs/linux/xfs_fs_subr.c
+++ b/fs/xfs/linux/xfs_fs_subr.c
@@ -135,7 +135,6 @@ fs_flushinval_pages(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 
 	if (VN_CACHED(vp)) {
-		filemap_fdatawait(ip->i_mapping);
 		filemap_fdatawrite(ip->i_mapping);
 		filemap_fdatawait(ip->i_mapping);
 
@@ -159,7 +158,6 @@ fs_flush_pages(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 
 	if (VN_CACHED(vp)) {
-		filemap_fdatawait(ip->i_mapping);
 		filemap_fdatawrite(ip->i_mapping);
 		filemap_fdatawait(ip->i_mapping);
 	}
-- 
cgit v1.2.3


From 5f7af2734ca70505ac74c5d0e13b3f42a0361759 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:50:46 +0200
Subject: XFS: Rearrange how xfs deals with read-only mounts vs. read-only
 devices.

Modid: 2.5.x-xfs:slinx:129120a
---
 fs/xfs/linux/xfs_lrw.c   | 38 +++++++-------------------------------
 fs/xfs/linux/xfs_lrw.h   |  4 +---
 fs/xfs/xfs_log_recover.c | 29 ++++++++++++++++++++---------
 fs/xfs/xfs_mount.c       | 11 ++++-------
 fs/xfs/xfs_qm.c          | 10 +---------
 5 files changed, 33 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index d1e92fbb0523..057f8f7bb4da 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -1836,45 +1836,21 @@ XFS_log_write_unmount_ro(bhv_desc_t	*bdp)
 }
 
 /*
- * In these two situations we disregard the readonly mount flag and
- * temporarily enable writes (we must, to ensure metadata integrity).
+ * If the underlying (log or data) device is readonly, there are some
+ * operations that cannot proceed.
  */
-STATIC int
-xfs_is_read_only(xfs_mount_t *mp)
+int
+xfs_dev_is_read_only(xfs_mount_t *mp, char *message)
 {
 	if (bdev_read_only(mp->m_ddev_targp->pbr_bdev) ||
 	    bdev_read_only(mp->m_logdev_targp->pbr_bdev)) {
+		cmn_err(CE_NOTE,
+			"XFS: %s required on read-only device.", message);
 		cmn_err(CE_NOTE,
 			"XFS: write access unavailable, cannot proceed.");
 		return EROFS;
 	}
-	cmn_err(CE_NOTE,
-		"XFS: write access will be enabled during mount.");
-	XFS_MTOVFS(mp)->vfs_flag &= ~VFS_RDONLY;
-	return 0;
-}
-
-int
-xfs_recover_read_only(xlog_t *log)
-{
-	cmn_err(CE_NOTE, "XFS: WARNING: "
-		"recovery required on readonly filesystem.");
-	return xfs_is_read_only(log->l_mp);
-}
-
-int
-xfs_quotacheck_read_only(xfs_mount_t *mp)
-{
-	cmn_err(CE_NOTE, "XFS: WARNING: "
-		"quotacheck required on readonly filesystem.");
-	return xfs_is_read_only(mp);
-}
 
-int
-xfs_quotaino_create_read_only(xfs_mount_t *mp)
-{
-        cmn_err(CE_NOTE, "XFS: WARNING: "
-                "Quota inode creation required on readonly filesystem.");
-        return xfs_is_read_only(mp);
+	return 0;
 }
 
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 4ea9c9f4bfb0..6f2ef2d4fa2a 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -62,9 +62,7 @@ extern ssize_t xfs_write (
 	loff_t			*offp,
 	struct cred		*credp);
 
-extern int xfs_recover_read_only (xlog_t *);
-extern int xfs_quotacheck_read_only (xfs_mount_t *);
-extern int xfs_quotaino_create_read_only (xfs_mount_t *);
+extern int xfs_dev_is_read_only(xfs_mount_t *, char *);
 
 extern void XFS_log_write_unmount_ro (bhv_desc_t *);
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8d1676e1d157..7c18ed89857d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -873,9 +873,19 @@ xlog_find_tail(xlog_t  *log,
 	 * overwrite the unmount record after a clean unmount.
 	 *
 	 * Do this only if we are going to recover the filesystem
+	 *
+	 * NOTE: This used to say "if (!readonly)"
+	 * However on Linux, we can & do recover a read-only filesystem.
+	 * We only skip recovery if NORECOVERY is specified on mount,
+	 * in which case we would not be here.
+	 *
+	 * But... if the -device- itself is readonly, just skip this.
+	 * We can't recover this device anyway, so it won't matter.
 	 */
-	if (!readonly)
+
+	if (!bdev_read_only(log->l_mp->m_logdev_targp->pbr_bdev)) {
 		error = xlog_clear_stale_blocks(log, tail_lsn);
+	}
 #endif
 
 bread_err:
@@ -3521,17 +3531,20 @@ xlog_recover(xlog_t *log, int readonly)
 		 * error message.
 		 * ...but this is no longer true.  Now, unless you specify
 		 * NORECOVERY (in which case this function would never be
-		 * called), it enables read-write access long enough to do
-		 * recovery.
+		 * called), we just go ahead and recover.  We do this all
+		 * under the vfs layer, so we can get away with it unless
+		 * the device itself is read-only, in which case we fail.
 		 */
-		if (readonly) {
 #ifdef __KERNEL__
-			if ((error = xfs_recover_read_only(log)))
-				return error;
+		if ((error = xfs_dev_is_read_only(log->l_mp,
+						"recovery required"))) {
+			return error;
+		}
 #else
+		if (readonly) {
 			return ENOSPC;
-#endif
 		}
+#endif
 
 #ifdef __KERNEL__
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
@@ -3548,8 +3561,6 @@ xlog_recover(xlog_t *log, int readonly)
 #endif
 		error = xlog_do_recover(log, head_blk, tail_blk);
 		log->l_flags |= XLOG_RECOVERY_NEEDED;
-		if (readonly)
-			XFS_MTOVFS(log->l_mp)->vfs_flag |= VFS_RDONLY;
 	}
 	return error;
 }	/* xlog_recover */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4dccca3c8b34..74b345fd3669 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -937,8 +937,7 @@ xfs_mountfs(
 
 	if (((quotaondisk && !XFS_IS_QUOTA_ON(mp)) ||
 	      (!quotaondisk && XFS_IS_QUOTA_ON(mp))) &&
-	    (bdev_read_only(mp->m_ddev_targp->pbr_bdev) ||
-	     bdev_read_only(mp->m_logdev_targp->pbr_bdev))) {
+	      xfs_dev_is_read_only(mp, "changing quota state")) {
 		cmn_err(CE_WARN,
 			"XFS: device %s is read-only, cannot change "
 			"quota state.  Please mount with%s quota option.",
@@ -1030,14 +1029,12 @@ xfs_mountfs(
 	if (needquotamount) {
 		ASSERT(mp->m_qflags == 0);
 		mp->m_qflags = quotaflags;
-		rootqcheck = ((XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) &&
-				mp->m_dev == rootdev && needquotacheck);
-		if (rootqcheck && (error = xfs_quotacheck_read_only(mp)))
+		rootqcheck = (mp->m_dev == rootdev && needquotacheck);
+		if (rootqcheck && (error = xfs_dev_is_read_only(mp,
+					"quotacheck")))
 			goto error2;
 		if (xfs_qm_mount_quotas(mp))
 			xfs_mount_reset_sbqflags(mp);
-		if (rootqcheck)
-			XFS_MTOVFS(mp)->vfs_flag |= VFS_RDONLY;
 	}
 
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3b87ef6c7d00..afeb5cce9b0c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -2039,17 +2039,9 @@ xfs_qm_init_quotainos(
 	/*
 	 * Create the two inodes, if they don't exist already. The changes
 	 * made above will get added to a transaction and logged in one of
-	 * the qino_alloc calls below.  If the device is readonly,
-	 * temporarily switch to read-write to do this.
+	 * the qino_alloc calls below.
 	 */
 
-	if (readonly &&
-	    ((XFS_IS_UQUOTA_ON(mp) && uip == NULL) || 
-	     (XFS_IS_GQUOTA_ON(mp) && gip == NULL))) {
-		if ((error = xfs_quotaino_create_read_only(mp)))
-			goto error;
-	}
-
 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
 		if ((error = xfs_qm_qino_alloc(mp, &uip,
 					      sbflags | XFS_SB_UQUOTINO,
-- 
cgit v1.2.3


From 5f612740ed055d85840d3fe3e5c1fc683b9ac768 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:51:45 +0200
Subject: XFS: Fix sysctl values, add PB_CLEAR_OWNER debugging line

Modid: 2.5.x-xfs:slinx:129132a
---
 fs/xfs/pagebuf/page_buf.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index 7fe0d77db5c6..72bdfdfb27f2 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -985,6 +985,7 @@ pagebuf_get_no_daddr(
 	}
 	/* otherwise pagebuf_free just ignores it */
 	pb->pb_flags |= _PBF_MEM_ALLOCATED;
+	PB_CLEAR_OWNER(pb);
 	up(&PBP(pb)->pb_sema);	/* Return unlocked pagebuf */
 
 	PB_TRACE(pb, PB_TRACE_REC(no_daddr), rmem);
@@ -1926,14 +1927,14 @@ STATIC ctl_table pagebuf_table[] = {
 	sizeof(ulong), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax,
 	&sysctl_intvec, NULL, &pagebuf_min[1], &pagebuf_max[1]},
 
-	{PB_STATS_CLEAR, "stats_clear", &pb_params.data[3],
+	{PB_STATS_CLEAR, "stats_clear", &pb_params.data[2],
 	sizeof(ulong), 0644, NULL, &pb_stats_clear_handler,
-	&sysctl_intvec, NULL, &pagebuf_min[3], &pagebuf_max[3]},
+	&sysctl_intvec, NULL, &pagebuf_min[2], &pagebuf_max[2]},
 
 #ifdef PAGEBUF_TRACE
-	{PB_DEBUG, "debug", &pb_params.data[4],
+	{PB_DEBUG, "debug", &pb_params.data[3],
 	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
-	&sysctl_intvec, NULL, &pagebuf_min[4], &pagebuf_max[4]},
+	&sysctl_intvec, NULL, &pagebuf_min[3], &pagebuf_max[3]},
 #endif
 	{0}
 };
-- 
cgit v1.2.3


From 10c2c79040279403e50571fd68126a0f0a593755 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:52:26 +0200
Subject: XFS: Check rtdev as well when testing for read-only devices

Modid: 2.5.x-xfs:slinx:129155a
---
 fs/xfs/linux/xfs_lrw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index 057f8f7bb4da..a96d71575fa1 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -1843,7 +1843,8 @@ int
 xfs_dev_is_read_only(xfs_mount_t *mp, char *message)
 {
 	if (bdev_read_only(mp->m_ddev_targp->pbr_bdev) ||
-	    bdev_read_only(mp->m_logdev_targp->pbr_bdev)) {
+	    bdev_read_only(mp->m_logdev_targp->pbr_bdev) ||
+	   (mp->m_rtdev_targp && bdev_read_only(mp->m_rtdev_targp->pbr_bdev))) {
 		cmn_err(CE_NOTE,
 			"XFS: %s required on read-only device.", message);
 		cmn_err(CE_NOTE,
-- 
cgit v1.2.3


From 2ceb1958721b967b2af07c6ef2ffca8913b98649 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:53:17 +0200
Subject: XFS: Export xfs_bmbt_get_all for the last fix in xfsidbg.c

Modid: 2.5.x-xfs:slinx:129160a
---
 fs/xfs/linux/xfs_globals.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
index 54a4343289f1..daadc37996e0 100644
--- a/fs/xfs/linux/xfs_globals.c
+++ b/fs/xfs/linux/xfs_globals.c
@@ -69,3 +69,5 @@ mutex_t		xfs_Gqm_lock;
 EXPORT_SYMBOL(xfs_Gqm);
 EXPORT_SYMBOL(xfs_next_bit);
 EXPORT_SYMBOL(xfs_contig_bits);
+EXPORT_SYMBOL(xfs_bmbt_get_all);
+
-- 
cgit v1.2.3


From a6a4f4e00f15b7491edbb47b1266b5fef8ebf829 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 02:54:17 +0200
Subject: XFS: Remove unused pagebuf flags

Modid: 2.5.x-xfs:slinx:129235a
---
 fs/xfs/pagebuf/page_buf.c | 12 +-----------
 fs/xfs/pagebuf/page_buf.h | 26 +++++++++-----------------
 fs/xfs/xfsidbg.c          | 16 ++++++++--------
 3 files changed, 18 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index 72bdfdfb27f2..b066bc7878df 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -305,8 +305,7 @@ _pagebuf_initialize(
 	/*
 	 * We don't want certain flags to appear in pb->pb_flags.
 	 */
-	flags &= ~(PBF_LOCK|PBF_ENTER_PAGES|PBF_MAPPED);
-	flags &= ~(PBF_DONT_BLOCK|PBF_READ_AHEAD);
+	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
 
 	pb_tracking_get(pb);
 
@@ -717,7 +716,6 @@ found:
 				PBF_MAPPED | \
 				_PBF_LOCKABLE | \
 				_PBF_ALL_PAGES_MAPPED | \
-				_PBF_SOME_INVALID_PAGES | \
 				_PBF_ADDR_ALLOCATED | \
 				_PBF_MEM_ALLOCATED;
 	PB_TRACE(pb, PB_TRACE_REC(got_lk), 0);
@@ -832,19 +830,11 @@ pagebuf_lookup(
 	int			flags)
 {
 	page_buf_t		*pb = NULL;
-	int			status;
 
 	flags |= _PBF_PRIVATE_BH;
 	pb = pagebuf_allocate(flags);
 	if (pb) {
 		_pagebuf_initialize(pb, target, ioff, isize, flags);
-		if (flags & PBF_ENTER_PAGES) {
-			status = _pagebuf_lookup_pages(pb, &inode->i_data, 0);
-			if (status != 0) {
-				pagebuf_free(pb);
-				return (NULL);
-			}
-		}
 	}
 	return pb;
 }
diff --git a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
index f62d8d419fc6..ff240fefd32c 100644
--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -100,35 +100,27 @@ typedef enum page_buf_flags_e {		/* pb_flags values */
 	PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages	   */
 	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
 	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
-	PBF_RELEASE = (1 << 12),/* buffer to be released after I/O is done */
 
 	/* flags used only as arguments to access routines */
 	PBF_LOCK = (1 << 13),	/* lock requested			   */
 	PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait	   */
-	PBF_ALLOCATE = (1 << 15), /* allocate all pages		  (UNUSED) */
-	PBF_FILE_ALLOCATE = (1 << 16), /* allocate all file space	   */
-	PBF_DONT_BLOCK = (1 << 17), /* do not block in current thread	   */
-	PBF_DIRECT = (1 << 18),	  /* direct I/O desired			   */
-	PBF_ENTER_PAGES = (1 << 21), /* create invalid pages for all	   */
-				/* pages in the range of the buffer	   */
-				/* not already associated with buffer	   */
+	PBF_FILE_ALLOCATE = (1 << 15), /* allocate all file space	   */
+	PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
+	PBF_DIRECT = (1 << 17),	  /* direct I/O desired			   */
 
 	/* flags used only internally */
 	_PBF_LOCKABLE = (1 << 19), /* page_buf_t may be locked		   */
 	_PBF_PRIVATE_BH = (1 << 20), /* do not use public buffer heads	   */
-	_PBF_ALL_PAGES_MAPPED = (1 << 22),
+	_PBF_ALL_PAGES_MAPPED = (1 << 21),
 				/* all pages in rage are mapped		   */
-	_PBF_SOME_INVALID_PAGES = (1 << 23),
-				/* some mapped pages are not valid	   */
-	_PBF_ADDR_ALLOCATED = (1 << 24),
+	_PBF_ADDR_ALLOCATED = (1 << 22),
 				/* pb_addr space was allocated		   */
-	_PBF_MEM_ALLOCATED = (1 << 25),
+	_PBF_MEM_ALLOCATED = (1 << 23),
 				/* pb_mem and underlying pages allocated   */
 
-	PBF_FORCEIO = (1 << 27),
-	PBF_FLUSH = (1 << 28),	/* flush disk write cache */
-	PBF_READ_AHEAD = (1 << 29),
-	PBF_FS_RESERVED_3 = (1 << 31)	/* reserved (XFS use: XFS_B_STALE) */
+	PBF_FORCEIO = (1 << 24),
+	PBF_FLUSH = (1 << 25),	/* flush disk write cache */
+	PBF_READ_AHEAD = (1 << 26),
 
 } page_buf_flags_t;
 
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index ac8d98287b3c..579eedcef1f3 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -1700,14 +1700,14 @@ static int	kdbm_vn(
 /* pagebuf stuff */
 
 static char	*pb_flag_vals[] = {
-/*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL",
-/*  4 */ "ASYNC", "NONE", "DELWRI", "FREED", "SYNC",
-/*  9 */ "MAPPABLE", "STALE", "FS_MANAGED", "RELEASE",
-/* 13 */ "LOCK", "TRYLOCK", "ALLOCATE", "FILE_ALLOCATE", "DONT_BLOCK",
-/* 18 */ "DIRECT", "LOCKABLE", "PRIVATE_BH", "ENTER_PAGES",
-/* 22 */ "ALL_PAGES_MAPPED", "SOME_INVALID_PAGES", "ADDR_ALLOCATED",
-/* 25 */ "MEM_ALLOCATED", "INVALID26", "FORCEIO", "FLUSH",
-/* 29 */ "READ_AHEAD", "INVALID30", "FS_RESERVED",
+/*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL", "ASYNC",
+/*  5 */ "NONE", "DELWRI", "FREED", "SYNC", "MAPPABLE",
+/* 10 */ "STALE", "FS_MANAGED", "INVALID12", "LOCK", "TRYLOCK",
+/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "INVALID19",
+/* 20 */ "LOCKABLE", "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED",
+	 "MEM_ALLOCATED",
+/* 25 */ "FORCEIO", "FLUSH", "READ_AHEAD", "INVALID28", "INVALID29",
+/* 30 */ "INVALID30", "INVALID31",
 	 NULL };
 
 static char	*pbm_flag_vals[] = {
-- 
cgit v1.2.3


From 0e4c68e4d1a5296f3091a0ef6901041e98e85cd6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 02:57:47 +0200
Subject: XFS: Remove leftovers of long-dead iocore methods

Modid: 2.5.x-xfs:slinx:129234a
---
 fs/xfs/linux/xfs_globals.c |   6 --
 fs/xfs/linux/xfs_globals.h |   1 -
 fs/xfs/linux/xfs_iops.c    |   9 ++-
 fs/xfs/linux/xfs_linux.h   |   4 ++
 fs/xfs/linux/xfs_super.c   | 148 +++++++++++++++++++++++----------------------
 fs/xfs/linux/xfs_sysctl.c  |  61 ++++++++++++-------
 fs/xfs/linux/xfs_sysctl.h  |  15 +++--
 fs/xfs/xfs_clnt.h          |   3 +-
 fs/xfs/xfs_inode.c         |  13 ++--
 fs/xfs/xfs_inode.h         |  17 ------
 fs/xfs/xfs_mount.h         |  10 +--
 fs/xfs/xfs_rw.h            |   1 -
 fs/xfs/xfs_vfsops.c        |   3 -
 fs/xfs/xfsidbg.c           |   3 +-
 14 files changed, 143 insertions(+), 151 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
index daadc37996e0..e831422a7f7b 100644
--- a/fs/xfs/linux/xfs_globals.c
+++ b/fs/xfs/linux/xfs_globals.c
@@ -40,12 +40,6 @@
 uint64_t	xfs_panic_mask;		/* set to cause more panics */
 unsigned long	xfs_physmem;
 
-/*
- * restricted_chown = 1	 bsd style chown(2), only super-user can give away files
- * restricted_chown = 0	 sysV style chown(2), non super-user can give away files
- */
-int		restricted_chown = 1;
-
 /*
  * Used to serialize atomicIncWithWrap.
  */
diff --git a/fs/xfs/linux/xfs_globals.h b/fs/xfs/linux/xfs_globals.h
index 943e029f1d42..07c9856b1353 100644
--- a/fs/xfs/linux/xfs_globals.h
+++ b/fs/xfs/linux/xfs_globals.h
@@ -39,7 +39,6 @@
 
 extern uint64_t xfs_panic_mask;		/* set to cause more panics */
 
-extern int	restricted_chown;
 extern unsigned long	xfs_physmem;
 
 extern struct cred *sys_cred;
diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
index be6b8b5306fa..9b46e62dbc14 100644
--- a/fs/xfs/linux/xfs_iops.c
+++ b/fs/xfs/linux/xfs_iops.c
@@ -261,12 +261,11 @@ linvfs_symlink(
 
 	bzero(&va, sizeof(va));
 	va.va_type = VLNK;
-	va.va_mode = 0777 & ~current->fs->umask;
-	va.va_mask = AT_TYPE|AT_MODE; /* AT_PROJID? */
+	va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+	va.va_mask = AT_TYPE|AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, &va, (char *)symname,
-							&cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
 	if (!error) {
 		ASSERT(cvp);
 		ASSERT(cvp->v_type == VLNK);
@@ -364,7 +363,7 @@ linvfs_readlink(
 }
 
 /*
- * careful here - this function can get called recusively, so
+ * careful here - this function can get called recursively, so
  * we need to be very careful about how much stack we use.
  * uio is kmalloced for this reason...
  */
diff --git a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
index 7def3bb302b8..49bb2095c10c 100644
--- a/fs/xfs/linux/xfs_linux.h
+++ b/fs/xfs/linux/xfs_linux.h
@@ -67,6 +67,10 @@
 #define STATIC static
 #endif
 
+#define restricted_chown	xfs_params.restrict_chown
+#define irix_sgid_inherit	xfs_params.sgid_inherit
+#define irix_symlink_mode	xfs_params.symlink_mode
+
 typedef struct xfs_dirent {		/* data from readdir() */
 	xfs_ino_t	d_ino;		/* inode number of entry */
 	xfs_off_t	d_off;		/* offset of disk directory entry */
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 0d2bc0e31728..5c32e8c489fb 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -92,8 +92,6 @@ STATIC struct export_operations linvfs_export_ops;
 #define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
 #define MNTOPT_NORECOVERY "norecovery"	/* don't run XFS recovery */
-#define MNTOPT_OSYNCISDSYNC "osyncisdsync" /* o_sync == o_dsync on this fs */
-					   /* (this is now the default!) */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_QUOTA	"quota"		/* disk quotas */
 #define MNTOPT_MRQUOTA	"mrquota"	/* don't turnoff if SB has quotas on */
@@ -104,7 +102,6 @@ STATIC struct export_operations linvfs_export_ops;
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce" /* same as uqnoenforce */
 #define MNTOPT_NOUUID	"nouuid"	/* Ignore FS uuid */
-#define MNTOPT_IRIXSGID "irixsgid"	/* Irix-style sgid inheritance */
 #define MNTOPT_NOLOGFLUSH  "nologflush"	/* Don't use hard flushes in
 					   log writing */
 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
@@ -175,9 +172,6 @@ xfs_parseargs(
 			args->iosizelog = (uint8_t) iosize;
 		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
 			args->flags |= XFSMNT_WSYNC;
-		} else if (!strcmp(this_char, MNTOPT_OSYNCISDSYNC)) {
-			/* no-op, this is now the default */
-printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
 			args->flags |= XFSMNT_OSYNCISOSYNC;
 		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
@@ -212,10 +206,13 @@ printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 			dswidth = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
 			args->flags |= XFSMNT_NOUUID;
-		} else if (!strcmp(this_char, MNTOPT_IRIXSGID)) {
-			args->flags |= XFSMNT_IRIXSGID;
 		} else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
 			args->flags |= XFSMNT_NOLOGFLUSH;
+		} else if (!strcmp(this_char, "osyncisdsync")) {
+			/* no-op, this is now the default */
+printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
+		} else if (!strcmp(this_char, "irixsgid")) {
+printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
 		} else {
 			printk("XFS: unknown mount option [%s].\n", this_char);
 			return rval;
@@ -264,6 +261,74 @@ printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 	return 0;
 }
 
+STATIC int
+xfs_showargs(
+	struct vfs		*vfsp,
+	struct seq_file		*m)
+{
+	static struct proc_xfs_info {
+		int	flag;
+		char	*str;
+	} xfs_info[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
+		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
+		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
+		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
+		{ 0, NULL }
+	};
+	struct proc_xfs_info	*xfs_infop;
+	struct xfs_mount	*mp = XFS_BHVTOM(vfsp->vfs_fbhv);
+
+	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
+		if (mp->m_flags & xfs_infop->flag)
+			seq_puts(m, xfs_infop->str);
+	}
+
+	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+		(mp->m_qflags & XFS_UQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_UQUOTA) :
+			seq_puts(m, "," MNTOPT_UQUOTANOENF);
+	}
+
+	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_GQUOTA) :
+			seq_puts(m, "," MNTOPT_GQUOTANOENF);
+	}
+
+	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+		seq_printf(m, "," MNTOPT_BIOSIZE "=%d", mp->m_writeio_log);
+
+	if (mp->m_logbufs > 0)
+		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+
+	if (mp->m_logbsize > 0)
+		seq_printf(m, "," MNTOPT_LOGBSIZE "=%d", mp->m_logbsize);
+
+	if (mp->m_ddev_targp->pbr_dev != mp->m_logdev_targp->pbr_dev)
+		seq_printf(m, "," MNTOPT_LOGDEV "=%s",
+				bdevname(mp->m_logdev_targp->pbr_bdev));
+
+	if (mp->m_rtdev_targp &&
+	    mp->m_ddev_targp->pbr_dev != mp->m_rtdev_targp->pbr_dev)
+		seq_printf(m, "," MNTOPT_RTDEV "=%s",
+				bdevname(mp->m_rtdev_targp->pbr_bdev));
+
+	if (mp->m_dalign > 0)
+		seq_printf(m, "," MNTOPT_SUNIT "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+
+	if (mp->m_swidth > 0)
+		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+	if (vfsp->vfs_flag & VFS_DMI)
+		seq_puts(m, "," MNTOPT_DMAPI);
+
+	return 0;
+}
+
 
 STATIC kmem_cache_t * linvfs_inode_cachep;
 
@@ -695,72 +760,9 @@ linvfs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
-	vfs_t			*vfsp;
-	xfs_mount_t		*mp;
-	static struct proc_xfs_info {
-		int	flag;
-		char	*str;
-	} xfs_info[] = {
-		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_NOALIGN,		",noalign" },
-		{ XFS_MOUNT_NORECOVERY,		",norecovery" },
-		{ XFS_MOUNT_OSYNCISOSYNC,	",osyncisosync" },
-		{ XFS_MOUNT_NOUUID,		",nouuid" },
-		{ XFS_MOUNT_IRIXSGID,		",irixsgid" },
-		{ 0, NULL }
-	};
-	struct proc_xfs_info	*xfs_infop;
-
-	vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-
-	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
-		if (mp->m_flags & xfs_infop->flag)
-			seq_puts(m, xfs_infop->str);
-	}
-
-	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-		seq_puts(m, ",uquota");
-		if (!(mp->m_qflags & XFS_UQUOTA_ENFD))
-			seq_puts(m, ",uqnoenforce");
-	}
-
-	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		seq_puts(m, ",gquota");
-		if (!(mp->m_qflags & XFS_GQUOTA_ENFD))
-			seq_puts(m, ",gqnoenforce");
-	}
-
-	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, ",biosize=%d", mp->m_writeio_log);
-
-	if (mp->m_logbufs > 0)
-		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
-
-	if (mp->m_logbsize > 0)
-		seq_printf(m, ",logbsize=%d", mp->m_logbsize);
-
-	if (mp->m_ddev_targp->pbr_dev != mp->m_logdev_targp->pbr_dev)
-		seq_printf(m, ",logdev=%s",
-				bdevname(mp->m_logdev_targp->pbr_bdev));
+	vfs_t			*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
 
-	if (mp->m_rtdev_targp &&
-	    mp->m_ddev_targp->pbr_dev != mp->m_rtdev_targp->pbr_dev)
-		seq_printf(m, ",rtdev=%s",
-				bdevname(mp->m_rtdev_targp->pbr_bdev));
-
-	if (mp->m_dalign > 0)
-		seq_printf(m, ",sunit=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-
-	if (mp->m_swidth > 0)
-		seq_printf(m, ",swidth=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-	if (vfsp->vfs_flag & VFS_DMI)
-		seq_puts(m, ",dmapi");
-
-	return 0;
+	return xfs_showargs(vfsp, m);
 }
 
 STATIC struct super_operations linvfs_sops = {
diff --git a/fs/xfs/linux/xfs_sysctl.c b/fs/xfs/linux/xfs_sysctl.c
index 840810b33f27..4229b8975e05 100644
--- a/fs/xfs/linux/xfs_sysctl.c
+++ b/fs/xfs/linux/xfs_sysctl.c
@@ -35,30 +35,34 @@
 #include <linux/proc_fs.h>
 
 /*
- * Tunable xfs parameters
+ * Tunable XFS parameters
  */
 
 extern struct xfsstats xfsstats;
 
-unsigned long xfs_min[XFS_PARAM] = {			 0,			 0, 0 };
-unsigned long xfs_max[XFS_PARAM] = { XFS_REFCACHE_SIZE_MAX,  XFS_REFCACHE_SIZE_MAX, 1 };
+STATIC ulong xfs_min[XFS_PARAM] = { \
+			    0,			    0, 0, 0, 0, 0 };
+STATIC ulong xfs_max[XFS_PARAM] = { \
+	XFS_REFCACHE_SIZE_MAX,  XFS_REFCACHE_SIZE_MAX, 1, 1, 1, 1 };
 
-xfs_param_t xfs_params = { 128, 32, 0 };
+xfs_param_t xfs_params = { 128, 32, 0, 1, 0, 0 };
 
 static struct ctl_table_header *xfs_table_header;
 
-/* proc handlers */
 
-extern void xfs_refcache_resize(int xfs_refcache_new_size);
+/* Custom proc handlers */
 
-static int
-xfs_refcache_resize_proc_handler(ctl_table *ctl, int write, struct file * filp,
-		       void *buffer, size_t *lenp)
+STATIC int
+xfs_refcache_resize_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	struct file	*filp,
+	void		*buffer,
+	size_t		*lenp)
 {
-	int	ret;
-	int	*valp = ctl->data;
-	int	xfs_refcache_new_size;
-	int	xfs_refcache_old_size = *valp;
+	int		ret, *valp = ctl->data;
+	int		xfs_refcache_new_size;
+	int		xfs_refcache_old_size = *valp;
 
 	ret = proc_doulongvec_minmax(ctl, write, filp, buffer, lenp);
 	xfs_refcache_new_size = *valp;
@@ -73,12 +77,15 @@ xfs_refcache_resize_proc_handler(ctl_table *ctl, int write, struct file * filp,
 	return ret;
 }
 
-static int
-xfs_stats_clear_proc_handler(ctl_table *ctl, int write, struct file * filp,
-		       void *buffer, size_t *lenp)
+STATIC int
+xfs_stats_clear_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	struct file	*filp,
+	void		*buffer,
+	size_t		*lenp)
 {
-	int		ret;
-	int		*valp = ctl->data;
+	int		ret, *valp = ctl->data;
 	__uint32_t	vn_active;
 
 	ret = proc_doulongvec_minmax(ctl, write, filp, buffer, lenp);
@@ -95,7 +102,7 @@ xfs_stats_clear_proc_handler(ctl_table *ctl, int write, struct file * filp,
 	return ret;
 }
 
-static ctl_table xfs_table[] = {
+STATIC ctl_table xfs_table[] = {
 	{XFS_REFCACHE_SIZE, "refcache_size", &xfs_params.refcache_size,
 	sizeof(ulong), 0644, NULL, &xfs_refcache_resize_proc_handler,
 	&sysctl_intvec, NULL, &xfs_min[0], &xfs_max[0]},
@@ -108,15 +115,27 @@ static ctl_table xfs_table[] = {
 	sizeof(ulong), 0644, NULL, &xfs_stats_clear_proc_handler,
 	&sysctl_intvec, NULL, &xfs_min[2], &xfs_max[2]},
 
+	{XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[3], &xfs_max[3]},
+
+	{XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[4], &xfs_max[4]},
+
+	{XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[5], &xfs_max[5]},
+
 	{0}
 };
 
-static ctl_table xfs_dir_table[] = {
+STATIC ctl_table xfs_dir_table[] = {
 	{FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
 	{0}
 };
 
-static ctl_table xfs_root_table[] = {
+STATIC ctl_table xfs_root_table[] = {
 	{CTL_FS, "fs",	NULL, 0, 0555, xfs_dir_table},
 	{0}
 };
diff --git a/fs/xfs/linux/xfs_sysctl.h b/fs/xfs/linux/xfs_sysctl.h
index 6649017ec372..4bf5749d1827 100644
--- a/fs/xfs/linux/xfs_sysctl.h
+++ b/fs/xfs/linux/xfs_sysctl.h
@@ -39,18 +39,25 @@
  * Tunable xfs parameters
  */
 
-#define XFS_PARAM	3
+#define XFS_PARAM	(sizeof(struct xfs_param) / sizeof(ulong))
 
 typedef struct xfs_param {
-	ulong	refcache_size;	/* Size of nfs refcache */
-	ulong	refcache_purge; /* # of entries to purge each time */
-	ulong	stats_clear;	/* reset all xfs stats to 0 */
+	ulong	refcache_size;	/* Size of NFS reference cache.          */
+	ulong	refcache_purge;	/* # of entries to purge each time.      */
+	ulong	stats_clear;	/* Reset all XFS statistics to zero.     */
+	ulong	restrict_chown;	/* Root/non-root can give away files.    */
+	ulong	sgid_inherit;	/* Inherit ISGID bit if process' GID is  */
+				/*  not a member of the parent dir GID.  */
+	ulong	symlink_mode;	/* Symlink creat mode affected by umask. */
 } xfs_param_t;
 
 enum {
 	XFS_REFCACHE_SIZE = 1,
 	XFS_REFCACHE_PURGE = 2,
 	XFS_STATS_CLEAR = 3,
+	XFS_RESTRICT_CHOWN = 4,
+	XFS_SGID_INHERIT = 5,
+	XFS_SYMLINK_MODE = 6,
 };
 
 extern xfs_param_t	xfs_params;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 14195151eeea..05df613952c6 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -119,8 +119,7 @@ struct xfs_mount_args {
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
 #define XFSMNT_32BITINODES	0x02000000	/* restrict inodes to 32
 						 * bits of address space */
-#define XFSMNT_IRIXSGID		0x04000000	/* Irix-style sgid inheritance */
-#define XFSMNT_NOLOGFLUSH	0x08000000	/* Don't flush for log blocks */
+#define XFSMNT_NOLOGFLUSH	0x04000000	/* Don't flush for log blocks */
 
 /* Did we get any args for CXFS to consume? */
 #define XFSARGS_FOR_CXFSARR(ap)		\
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 35506332b70e..66b1b351c3e0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1097,14 +1097,13 @@ xfs_ialloc(
 
 	/*
 	 * If the group ID of the new file does not match the effective group
-	 * ID or one of the supplementary group IDs, the ISGID bit is
-	 * cleared if the "irixsgid" mount option is set.
+	 * ID or one of the supplementary group IDs, the ISGID bit is cleared
+	 * (and only if the irix_sgid_inherit compatibility variable is set).
 	 */
-	if (ip->i_d.di_mode & ISGID) {
-		if (!in_group_p((gid_t)ip->i_d.di_gid)
-		    && (ip->i_mount->m_flags & XFS_MOUNT_IRIXSGID)) {
-			ip->i_d.di_mode &= ~ISGID;
-		}
+	if ((irix_sgid_inherit) &&
+	    (ip->i_d.di_mode & ISGID) &&
+	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
+		ip->i_d.di_mode &= ~ISGID;
 	}
 
 	ip->i_d.di_size = 0;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e3c2977ef18c..38e5bca7b35f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -119,23 +119,6 @@ typedef struct xfs_gap {
 	xfs_extlen_t	xg_count_fsb;
 } xfs_gap_t;
 
-/*
- * This structure is used to hold common pieces of the buffer
- * and file for xfs_dio_write and xfs_dio_read.
- */
-typedef struct xfs_dio {
-	struct xfs_buf	*xd_bp;
-	bhv_desc_t	*xd_bdp;
-	struct xfs_inode *xd_ip;
-	struct xfs_iocore *xd_io;
-	struct cred	*xd_cr;
-	struct pm	*xd_pmp;
-	int		xd_blkalgn;
-	int		xd_ioflag;
-	xfs_off_t	xd_start;
-	size_t		xd_length;
-} xfs_dio_t;
-
 typedef struct dm_attrs_s {
 	__uint32_t	da_dmevmask;	/* DMIG event mask */
 	__uint16_t	da_dmstate;	/* DMIG state info */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a5b4ec193617..7011e001b6c3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -77,7 +77,6 @@ struct xfs_inode;
 struct xfs_perag;
 struct xfs_quotainfo;
 struct xfs_iocore;
-struct xfs_dio;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 
@@ -96,24 +95,18 @@ struct xfs_bmap_free;
  * minimize the number of memory indirections involved.
  */
 
-typedef int		(*xfs_dio_write_t)(struct xfs_dio *);
-typedef int		(*xfs_dio_read_t)(struct xfs_dio *);
-typedef int		(*xfs_strat_write_t)(struct xfs_iocore *, struct xfs_buf *);
 typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
 				xfs_fileoff_t, xfs_filblks_t, int,
 				xfs_fsblock_t *, xfs_extlen_t,
 				struct xfs_bmbt_irec *, int *,
 				struct xfs_bmap_free *);
 typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
-typedef int		(*xfs_rsync_t)(void *, int, xfs_off_t, xfs_off_t);
-typedef uint		(*xfs_lck_map_shared_t)(void *);
 typedef void		(*xfs_lock_t)(void *, uint);
 typedef void		(*xfs_lock_demote_t)(void *, uint);
 typedef int		(*xfs_lock_nowait_t)(void *, uint);
 typedef void		(*xfs_unlk_t)(void *, unsigned int);
 typedef void		(*xfs_chgtime_t)(void *, int);
 typedef xfs_fsize_t	(*xfs_size_t)(void *);
-typedef xfs_fsize_t	(*xfs_setsize_t)(void *, xfs_off_t);
 typedef xfs_fsize_t	(*xfs_lastbyte_t)(void *);
 
 typedef struct xfs_ioops {
@@ -295,8 +288,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_NOUUID	0x00004000	/* ignore uuid during mount */
 #define XFS_MOUNT_32BITINODES	0x00008000	/* do not create inodes above
 						 * 32 bits in size */
-#define XFS_MOUNT_IRIXSGID	0x00010000	/* Irix-style sgid inheritance */
-#define XFS_MOUNT_NOLOGFLUSH	0x00020000
+#define XFS_MOUNT_NOLOGFLUSH	0x00010000
 
 /*
  * Flags for m_cxfstype
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f2f4c5d88738..3b76892df952 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -42,7 +42,6 @@ struct xfs_inode;
 struct xfs_iocore;
 struct xfs_mount;
 struct xfs_trans;
-struct xfs_dio;
 struct pm;
 
 /*
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 263ee3310b50..3604040fe8bf 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -278,9 +278,6 @@ xfs_start_flags(
 	if (1 || ap->flags & XFSMNT_32BITINODES)
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 
-	if (ap->flags & XFSMNT_IRIXSGID)
-		mp->m_flags |= XFS_MOUNT_IRIXSGID;
-
 	if (ap->flags & XFSMNT_IOSIZE) {
 		if (ap->iosizelog > XFS_MAX_IO_LOG ||
 		    ap->iosizelog < XFS_MIN_IO_LOG) {
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 579eedcef1f3..3f7ffef71667 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -4380,8 +4380,7 @@ xfsidbg_xmount(xfs_mount_t *mp)
 		"OSYNC",	/* 0x2000 */
 		"NOUUID",	/* 0x4000 */
 		"32BIT",	/* 0x8000 */
-		"IRIXSGID",	/* 0x10000 */
-		"NOLOGFLUSH",	/* 0x20000 */
+		"NOLOGFLUSH",	/* 0x10000 */
 		0
 	};
 
-- 
cgit v1.2.3


From d1ae67a2452b1a880ad72b8c156f83581fa9c89f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 02:59:02 +0200
Subject: XFS: Remove struct pm entirely - it was never defined in the Linux
 port.

Modid: 2.5.x-xfs:slinx:129236a
---
 fs/xfs/linux/xfs_lrw.c | 26 +++++++++++---------------
 fs/xfs/linux/xfs_lrw.h |  2 +-
 fs/xfs/xfs_inode.c     |  2 +-
 fs/xfs/xfs_inode.h     |  1 -
 fs/xfs/xfs_rw.c        |  2 +-
 fs/xfs/xfs_rw.h        |  1 -
 6 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index a96d71575fa1..05a4b4e84b4c 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -44,9 +44,9 @@
 #define XFS_STRAT_WRITE_IMAPS	2
 
 STATIC int xfs_iomap_read(xfs_iocore_t *, loff_t, size_t, int, pb_bmap_t *,
-			int *, struct pm *);
+			int *);
 STATIC int xfs_iomap_write(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
-			int *, int, struct pm *);
+			int *, int);
 STATIC int xfs_iomap_write_delay(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
 			int *, int, int);
 STATIC int xfs_iomap_write_direct(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
@@ -233,8 +233,7 @@ xfs_zero_last_block(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,
 	xfs_fsize_t	isize,
-	xfs_fsize_t	end_size,
-	struct pm	*pmp)
+	xfs_fsize_t	end_size)
 {
 	xfs_fileoff_t	last_fsb;
 	xfs_mount_t	*mp;
@@ -313,8 +312,7 @@ xfs_zero_eof(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,		/* starting I/O offset */
 	xfs_fsize_t	isize,		/* current inode size */
-	xfs_fsize_t	end_size,	/* terminal inode size */
-	struct pm	*pmp)
+	xfs_fsize_t	end_size)	/* terminal inode size */
 {
 	struct inode	*ip = LINVFS_GET_IP(vp);
 	xfs_fileoff_t	start_zero_fsb;
@@ -340,7 +338,7 @@ xfs_zero_eof(
 	 * First handle zeroing the block on which isize resides.
 	 * We only zero a part of that block so it is handled specially.
 	 */
-	error = xfs_zero_last_block(ip, io, offset, isize, end_size, pmp);
+	error = xfs_zero_last_block(ip, io, offset, isize, end_size);
 	if (error) {
 		ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
 		ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -596,7 +594,7 @@ start:
 
 	if (!direct && (*offset > isize && isize)) {
 		error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset,
-			isize, *offset + size, NULL);
+			isize, *offset + size);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
 			return(-error);
@@ -790,7 +788,7 @@ xfs_bmap(bhv_desc_t	*bdp,
 	if (flags & PBF_READ) {
 		lockmode = xfs_ilock_map_shared(ip);
 		error = xfs_iomap_read(&ip->i_iocore, offset, count,
-				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps, NULL);
+				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps);
 		xfs_iunlock_map_shared(ip, lockmode);
 	} else { /* PBF_WRITE */
 		ASSERT(flags & PBF_WRITE);
@@ -812,7 +810,7 @@ xfs_bmap(bhv_desc_t	*bdp,
 		}
 retry:
 		error = xfs_iomap_write(&ip->i_iocore, offset, count,
-					pbmapp, npbmaps, flags, NULL);
+					pbmapp, npbmaps, flags);
 		/* xfs_iomap_write unlocks/locks/unlocks */
 
 		if (error == ENOSPC) {
@@ -1145,8 +1143,7 @@ xfs_iomap_read(
 	size_t		count,
 	int		flags,
 	pb_bmap_t	*pbmapp,
-	int		*npbmaps,
-	struct pm	*pmp)
+	int		*npbmaps)
 {
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	end_fsb;
@@ -1199,8 +1196,7 @@ xfs_iomap_write(
 	size_t		count,
 	pb_bmap_t	*pbmapp,
 	int		*npbmaps,
-	int		ioflag,
-	struct pm	*pmp)
+	int		ioflag)
 {
 	int		maps;
 	int		error = 0;
@@ -1217,7 +1213,7 @@ xfs_iomap_write(
 	 */
 
 	found = 0;
-	error = xfs_iomap_read(io, offset, count, flags, pbmapp, npbmaps, NULL);
+	error = xfs_iomap_read(io, offset, count, flags, pbmapp, npbmaps);
 	if (error)
 		goto out;
 
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 6f2ef2d4fa2a..4d1967353a44 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -45,7 +45,7 @@ extern int xfsbdstrat (struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb (struct xfs_buf *);
 
 extern int xfs_zero_eof (vnode_t *, struct xfs_iocore *, xfs_off_t,
-				xfs_fsize_t, xfs_fsize_t, struct pm *);
+				xfs_fsize_t, xfs_fsize_t);
 extern ssize_t xfs_read (
 	struct bhv_desc		*bdp,
 	struct file		*filp,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66b1b351c3e0..49d98172ec1a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1714,7 +1714,7 @@ xfs_igrow_start(
 	 * and any blocks between the old and new file sizes.
 	 */
 	error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
-				new_size, NULL);
+				new_size);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 38e5bca7b35f..ea739492d91e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -98,7 +98,6 @@ struct xfs_inode_log_item;
 struct xfs_mount;
 struct xfs_trans;
 struct xfs_dquot;
-struct pm;
 
 
 /*
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 0076a84485e8..35978e54d75d 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -362,7 +362,7 @@ xfs_inval_cached_pages(
 		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 		isize = XFS_SIZE(mp, io);
 		if (offset > isize) {
-			xfs_zero_eof(vp, io, offset, isize, offset, NULL);
+			xfs_zero_eof(vp, io, offset, isize, offset);
 		}
 		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 3b76892df952..96bf21108a24 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -42,7 +42,6 @@ struct xfs_inode;
 struct xfs_iocore;
 struct xfs_mount;
 struct xfs_trans;
-struct pm;
 
 /*
  * Maximum count of bmaps used by read and write paths.
-- 
cgit v1.2.3


From ca92362fcd2a34a70582d0299f90f189b30924e0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 02:59:56 +0200
Subject: XFS: Don't update i_rdev and i_generation in vn_revalidate.

Modid: 2.5.x-xfs:slinx:129315a
---
 fs/xfs/linux/xfs_vnode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
index 9689236c6ae5..95386e4a9729 100644
--- a/fs/xfs/linux/xfs_vnode.c
+++ b/fs/xfs/linux/xfs_vnode.c
@@ -225,8 +225,6 @@ vn_revalidate(struct vnode *vp)
 		inode->i_nlink	    = va.va_nlink;
 		inode->i_uid	    = va.va_uid;
 		inode->i_gid	    = va.va_gid;
-		inode->i_rdev	    = XFS_DEV_TO_KDEVT(va.va_rdev);
-		inode->i_generation = va.va_gencount;
 		inode->i_size	    = va.va_size;
 		inode->i_blocks	    = va.va_nblocks;
 		inode->i_mtime	    = va.va_mtime.tv_sec;
-- 
cgit v1.2.3


From 09ab13f9ba06bafb233cc5e1ceb19bacd286b540 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:00:58 +0200
Subject: XFS: ensure inode size is correct after making a symlink.

Modid: 2.5.x-xfs:slinx:129340a
---
 fs/xfs/linux/xfs_iops.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
index 9b46e62dbc14..20ac2a8e83cb 100644
--- a/fs/xfs/linux/xfs_iops.c
+++ b/fs/xfs/linux/xfs_iops.c
@@ -276,6 +276,7 @@ linvfs_symlink(
 		} else {
 			d_instantiate(dentry, ip);
 			validate_fields(dir);
+			validate_fields(ip); /* size needs update */
 			mark_inode_dirty_sync(ip);
 			mark_inode_dirty_sync(dir);
 		}
-- 
cgit v1.2.3


From a46021bb58f47f6cba299041f610fcaa1497ebcb Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 15 Oct 2002 03:06:39 +0200
Subject: XFS: Global search and replace of the b* memory routines to their
 mem* equivalents.

Modid: 2.5.x-xfs:slinx:129466a
---
 fs/xfs/linux/xfs_ioctl.c  |  4 +--
 fs/xfs/linux/xfs_iops.c   |  4 +--
 fs/xfs/support/move.c     |  4 +--
 fs/xfs/support/move.h     |  5 ---
 fs/xfs/support/uuid.c     |  4 +--
 fs/xfs/xfs_alloc_btree.c  | 36 +++++++++----------
 fs/xfs/xfs_arch.h         |  4 +--
 fs/xfs/xfs_attr.c         |  6 ++--
 fs/xfs/xfs_attr_fetch.c   |  2 +-
 fs/xfs/xfs_attr_leaf.c    | 92 +++++++++++++++++++++++------------------------
 fs/xfs/xfs_bmap.c         | 12 +++----
 fs/xfs/xfs_bmap_btree.c   | 52 +++++++++++++--------------
 fs/xfs/xfs_buf_item.c     |  2 +-
 fs/xfs/xfs_da_btree.c     | 40 ++++++++++-----------
 fs/xfs/xfs_dir.c          |  2 +-
 fs/xfs/xfs_dir2.c         |  6 ++--
 fs/xfs/xfs_dir2_block.c   | 14 ++++----
 fs/xfs/xfs_dir2_data.c    |  2 +-
 fs/xfs/xfs_dir2_leaf.c    | 30 ++++++++--------
 fs/xfs/xfs_dir2_node.c    | 24 ++++++-------
 fs/xfs/xfs_dir2_sf.c      | 32 ++++++++---------
 fs/xfs/xfs_dir2_sf.h      |  2 +-
 fs/xfs/xfs_dir2_trace.c   |  4 +--
 fs/xfs/xfs_dir_leaf.c     | 72 ++++++++++++++++++-------------------
 fs/xfs/xfs_dir_sf.h       |  2 +-
 fs/xfs/xfs_dquot.c        | 14 ++++----
 fs/xfs/xfs_error.c        | 14 ++++----
 fs/xfs/xfs_fs.h           |  4 +--
 fs/xfs/xfs_fsops.c        | 12 +++----
 fs/xfs/xfs_ialloc.c       |  4 +--
 fs/xfs/xfs_ialloc_btree.c | 36 +++++++++----------
 fs/xfs/xfs_inode.c        | 55 ++++++++++++++--------------
 fs/xfs/xfs_inode_item.c   |  6 ++--
 fs/xfs/xfs_itable.c       |  2 +-
 fs/xfs/xfs_log.c          | 16 ++++-----
 fs/xfs/xfs_log_recover.c  | 31 ++++++++--------
 fs/xfs/xfs_mount.c        |  8 ++---
 fs/xfs/xfs_qm.c           |  8 ++---
 fs/xfs/xfs_qm_syscalls.c  |  4 +--
 fs/xfs/xfs_rtalloc.c      |  2 +-
 fs/xfs/xfs_trans_buf.c    |  2 +-
 fs/xfs/xfs_trans_inode.c  | 14 +-------
 fs/xfs/xfs_utils.c        |  2 +-
 fs/xfs/xfs_vnodeops.c     | 12 +++----
 44 files changed, 344 insertions(+), 359 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
index 1a5a26bbc217..03451043e983 100644
--- a/fs/xfs/linux/xfs_ioctl.c
+++ b/fs/xfs/linux/xfs_ioctl.c
@@ -67,7 +67,7 @@ xfs_find_handle(
 	if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq)))
 		return -XFS_ERROR(EFAULT);
 
-	bzero((char *)&handle, sizeof(handle));
+	memset((char *)&handle, 0, sizeof(handle));
 
 	switch (cmd) {
 	case XFS_IOC_PATH_TO_FSHANDLE:
@@ -228,7 +228,7 @@ xfs_vget_fsop_handlereq(
 	if (copy_from_user(handlep, hanp, hlen))
 		return XFS_ERROR(EFAULT);
 	if (hlen < sizeof(*handlep))
-		bzero(((char *)handlep) + hlen, sizeof(*handlep) - hlen);
+		memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
 	if (hlen > sizeof(handlep->ha_fsid)) {
 		if (handlep->ha_fid.xfs_fid_len !=
 				(hlen - sizeof(handlep->ha_fsid)
diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
index 20ac2a8e83cb..d2ca5a30238a 100644
--- a/fs/xfs/linux/xfs_iops.c
+++ b/fs/xfs/linux/xfs_iops.c
@@ -91,7 +91,7 @@ linvfs_mknod(
 		mode &= ~current->fs->umask;
 #endif
 
-	bzero(&va, sizeof(va));
+	memset(&va, 0, sizeof(va));
 	va.va_mask = AT_TYPE|AT_MODE;
 	va.va_type = IFTOVT(mode);
 	va.va_mode = mode;
@@ -259,7 +259,7 @@ linvfs_symlink(
 
 	dvp = LINVFS_GET_VP(dir);
 
-	bzero(&va, sizeof(va));
+	memset(&va, 0, sizeof(va));
 	va.va_type = VLNK;
 	va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
 	va.va_mask = AT_TYPE|AT_MODE;
diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
index 4fc3831eed38..15dbd090c6b8 100644
--- a/fs/xfs/support/move.c
+++ b/fs/xfs/support/move.c
@@ -72,9 +72,9 @@ uiomove(void *cp, size_t n, enum uio_rw rw, struct uio *uio)
 
 		case UIO_SYSSPACE:
 			if (rw == UIO_READ)
-				bcopy(cp, iov->iov_base, cnt);
+				memcpy(iov->iov_base, cp, cnt);
 			else
-				bcopy(iov->iov_base, cp, cnt);
+				memcpy(cp, iov->iov_base, cnt);
 			break;
 
 		default:
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index e01b7b6c7a15..dd63285df2e6 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -36,11 +36,6 @@
 #include <linux/uio.h>
 #include <asm/uaccess.h>
 
-#define bzero(p,s)	memset((p), 0, (s))
-#define bcopy(s,d,n)	memcpy((d),(s),(n))
-#define bcmp(s1,s2,l)	memcmp(s1,s2,l)
-#define ovbcopy(from,to,count)	memmove(to,from,count)
-
 typedef struct iovec iovec_t;
 
 typedef struct uio {
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index f30b857a3f9a..fd98101312c1 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -109,7 +109,7 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 void
 uuid_create_nil(uuid_t *uuid)
 {
-	bzero(uuid, sizeof *uuid);
+	memset(uuid, 0, sizeof(*uuid));
 }
 
 int
@@ -129,7 +129,7 @@ uuid_is_nil(uuid_t *uuid)
 int
 uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
 {
-	return bcmp(uuid1, uuid2, sizeof(uuid_t)) ? B_FALSE : B_TRUE;
+	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? B_FALSE : B_TRUE;
 }
 
 /*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 11c7618be7c7..9bb3fe79243d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -132,9 +132,9 @@ xfs_alloc_delrec(
 		}
 #endif
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&lkp[ptr], &lkp[ptr - 1],
+			memmove(&lkp[ptr - 1], &lkp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
-			ovbcopy(&lpp[ptr], &lpp[ptr - 1],
+			memmove(&lpp[ptr - 1], &lpp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
 			xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 			xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
@@ -147,7 +147,7 @@ xfs_alloc_delrec(
 	else {
 		lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&lrp[ptr], &lrp[ptr - 1],
+			memmove(&lrp[ptr - 1], &lrp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
 			xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 		}
@@ -464,8 +464,8 @@ xfs_alloc_delrec(
 				return error;
 		}
 #endif
-		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
-		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
+		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
+		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
 		xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
@@ -476,7 +476,7 @@ xfs_alloc_delrec(
 		 */
 		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
 		xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	}
@@ -697,9 +697,9 @@ xfs_alloc_insrec(
 				return error;
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
-		ovbcopy(&pp[ptr - 1], &pp[ptr],
+		memmove(&pp[ptr], &pp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
@@ -723,7 +723,7 @@ xfs_alloc_insrec(
 		 * It's a leaf entry.  Make a hole for the new record.
 		 */
 		rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
 		/*
 		 * Now stuff the new record in, bump numrecs
@@ -1217,12 +1217,12 @@ xfs_alloc_lshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	} else {
-		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
 		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
@@ -1475,8 +1475,8 @@ xfs_alloc_rshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
 			return error;
@@ -1492,7 +1492,7 @@ xfs_alloc_rshift(
 
 		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
 		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
@@ -1608,8 +1608,8 @@ xfs_alloc_split(
 				return error;
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));/* INT_: copy */
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); /* INT_: copy */
 		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		*keyp = *rkp;
@@ -1623,7 +1623,7 @@ xfs_alloc_split(
 
 		lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
 		keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 57a28544be96..4629bc745e07 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -260,13 +260,13 @@
 
 #define DIRINO_COPY_ARCH(from,to,arch) \
     if ((arch) == ARCH_NOCONVERT) { \
-	bcopy(from,to,sizeof(xfs_ino_t)); \
+	memcpy(to,from,sizeof(xfs_ino_t)); \
     } else { \
 	INT_SWAP_UNALIGNED_64(from,to); \
     }
 #define DIRINO4_COPY_ARCH(from,to,arch) \
     if ((arch) == ARCH_NOCONVERT) { \
-	bcopy((((__u8*)from+4)),to,sizeof(xfs_dir2_ino4_t)); \
+	memcpy(to,(((__u8*)from+4)),sizeof(xfs_dir2_ino4_t)); \
     } else { \
 	INT_SWAP_UNALIGNED_32(from,to); \
     }
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 74563b62d3fc..482a20fcbfde 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -120,7 +120,7 @@ xfs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.value = value;
@@ -215,7 +215,7 @@ xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.value = value;
@@ -469,7 +469,7 @@ xfs_attr_remove(bhv_desc_t *bdp, char *name, int flags, struct cred *cred)
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.flags = flags;
diff --git a/fs/xfs/xfs_attr_fetch.c b/fs/xfs/xfs_attr_fetch.c
index 0c9af54eeed5..4b1a23cb21a6 100644
--- a/fs/xfs/xfs_attr_fetch.c
+++ b/fs/xfs/xfs_attr_fetch.c
@@ -43,7 +43,7 @@ xfs_attr_fetch(xfs_inode_t *ip, char *name, char *value, int valuelen)
 	/*
 	 * Do the argument setup for the xfs_attr routines.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = ip;
 	args.flags = ATTR_ROOT;
 	args.whichfork = XFS_ATTR_FORK;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 884da53fa54d..b1c4836d6709 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -145,8 +145,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 	sfe->namelen = args->namelen;
 	INT_SET(sfe->valuelen, ARCH_CONVERT, args->valuelen);
 	sfe->flags = (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
-	bcopy(args->name, sfe->nameval, args->namelen);
-	bcopy(args->value, &sfe->nameval[args->namelen], args->valuelen);
+	memcpy(sfe->nameval, args->name, args->namelen);
+	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, 1);
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
@@ -178,7 +178,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(sfe->nameval, args->name, args->namelen) != 0)
+		if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -191,7 +191,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	end = base + size;
 	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
 	if (end != totsize) {
-		ovbcopy(&((char *)sf)[end], &((char *)sf)[base],
+		memmove(&((char *)sf)[base], &((char *)sf)[end],
 							totsize - end);
 	}
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
@@ -222,7 +222,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -250,7 +250,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -264,7 +264,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 			return(XFS_ERROR(ERANGE));
 		}
 		args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
-		bcopy(&sfe->nameval[args->namelen], args->value,
+		memcpy(args->value, &sfe->nameval[args->namelen],
 						    args->valuelen);
 		return(XFS_ERROR(EEXIST));
 	}
@@ -293,7 +293,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	size = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(ifp->if_u1.if_data, tmpbuffer, size);
+	memcpy(tmpbuffer, ifp->if_u1.if_data, size);
 	sf = (xfs_attr_shortform_t *)tmpbuffer;
 
 	xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
@@ -307,7 +307,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		if (error == EIO)
 			goto out;
 		xfs_idata_realloc(dp, size, XFS_ATTR_FORK);	/* try to put */
-		bcopy(tmpbuffer, ifp->if_u1.if_data, size);	/* it back */
+		memcpy(ifp->if_u1.if_data, tmpbuffer, size);	/* it back */
 		goto out;
 	}
 
@@ -319,11 +319,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		if (error)
 			goto out;
 		xfs_idata_realloc(dp, size, XFS_ATTR_FORK);	/* try to put */
-		bcopy(tmpbuffer, ifp->if_u1.if_data, size);	/* it back */
+		memcpy(ifp->if_u1.if_data, tmpbuffer, size);	/* it back */
 		goto out;
 	}
 
-	bzero((char *)&nargs, sizeof(nargs));
+	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
@@ -590,11 +590,11 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(tmpbuffer != NULL);
 
 	ASSERT(bp != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
 						== XFS_ATTR_LEAF_MAGIC);
-	bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
 	 * Clean out the prior contents of the attribute list.
@@ -609,7 +609,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Copy the attributes
 	 */
-	bzero((char *)&nargs, sizeof(nargs));
+	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
@@ -669,7 +669,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	if (error)
 		goto out;
 	ASSERT(bp2 != NULL);
-	bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
 	xfs_da_buf_done(bp1);
 	bp1 = NULL;
 	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
@@ -725,7 +725,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 		return(error);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
@@ -900,7 +900,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
 		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
-		ovbcopy((char *)entry, (char *)(entry+1), tmp);
+		memmove((char *)(entry+1), (char *)entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
@@ -955,13 +955,13 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		name_loc->namelen = args->namelen;
 		INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
-		bcopy(args->name, (char *)name_loc->nameval, args->namelen);
-		bcopy(args->value, (char *)&name_loc->nameval[args->namelen],
+		memcpy((char *)name_loc->nameval, args->name, args->namelen);
+		memcpy((char *)&name_loc->nameval[args->namelen], args->value,
 				   INT_GET(name_loc->valuelen, ARCH_CONVERT));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		name_rmt->namelen = args->namelen;
-		bcopy(args->name, (char *)name_rmt->name, args->namelen);
+		memcpy((char *)name_rmt->name, args->name, args->namelen);
 		entry->flags |= XFS_ATTR_INCOMPLETE;
 		/* just in case */
 		INT_ZERO(name_rmt->valuelen, ARCH_CONVERT);
@@ -1017,8 +1017,8 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	mp = trans->t_mountp;
 	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(mp));
-	bzero(bp->data, XFS_LBSIZE(mp));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp));
+	memset(bp->data, 0, XFS_LBSIZE(mp));
 
 	/*
 	 * Copy basic information
@@ -1390,7 +1390,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -1450,7 +1450,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1585,7 +1585,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
-	bzero(XFS_ATTR_LEAF_NAME(leaf, args->index), entsize);
+	memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize);
 	INT_MOD(hdr->usedbytes, ARCH_CONVERT, -entsize);
 	xfs_da_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
@@ -1593,12 +1593,12 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - args->index)
 					* sizeof(xfs_attr_leaf_entry_t);
-	ovbcopy((char *)(entry+1), (char *)entry, tmp);
+	memmove((char *)entry, (char *)(entry+1), tmp);
 	INT_MOD(hdr->count, ARCH_CONVERT, -1);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
-	bzero((char *)entry, sizeof(xfs_attr_leaf_entry_t));
+	memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
 
 	/*
 	 * If we removed the first entry, re-find the first used byte
@@ -1701,7 +1701,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
 		ASSERT(tmpbuffer != NULL);
-		bzero(tmpbuffer, state->blocksize);
+		memset(tmpbuffer, 0, state->blocksize);
 		tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info; /* struct copy */
@@ -1729,7 +1729,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
 				mp);
 		}
-		bcopy((char *)tmp_leaf, (char *)save_leaf, state->blocksize);
+		memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
 	}
 
@@ -1840,7 +1840,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe);
 			if (name_loc->namelen != args->namelen)
 				continue;
-			if (bcmp(args->name, (char *)name_loc->nameval,
+			if (memcmp(args->name, (char *)name_loc->nameval,
 					     args->namelen) != 0)
 				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
@@ -1852,7 +1852,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, probe);
 			if (name_rmt->namelen != args->namelen)
 				continue;
-			if (bcmp(args->name, (char *)name_rmt->name,
+			if (memcmp(args->name, (char *)name_rmt->name,
 					     args->namelen) != 0)
 				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
@@ -1895,7 +1895,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if (entry->flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		ASSERT(name_loc->namelen == args->namelen);
-		ASSERT(bcmp(args->name, name_loc->nameval, args->namelen) == 0);
+		ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
 		valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
@@ -1906,11 +1906,11 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			return(XFS_ERROR(ERANGE));
 		}
 		args->valuelen = valuelen;
-		bcopy(&name_loc->nameval[args->namelen], args->value, valuelen);
+		memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		ASSERT(name_rmt->namelen == args->namelen);
-		ASSERT(bcmp(args->name, name_rmt->name, args->namelen) == 0);
+		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
 		valuelen = INT_GET(name_rmt->valuelen, ARCH_CONVERT);
 		args->rmtblkno = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
 		args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
@@ -1983,7 +1983,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
-		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+		memmove((char *)entry_d, (char *)entry_s, tmp);
 	}
 
 	/*
@@ -2004,7 +2004,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 * off for 6.2, should be revisited later.
 		 */
 		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
-			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
 			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
 			entry_d--;	/* to compensate for ++ in loop hdr */
@@ -2021,11 +2021,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			entry_d->flags = entry_s->flags;
 			ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
-			ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i),
-			      XFS_ATTR_LEAF_NAME(leaf_d, desti), tmp);
+			memmove(XFS_ATTR_LEAF_NAME(leaf_d, desti),
+				XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
 			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
-			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
 			INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
 			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
@@ -2047,7 +2047,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	} else {
 		/*
 		 * Move the remaining entries down to fill the hole,
@@ -2057,14 +2057,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
-		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+		memmove((char *)entry_d, (char *)entry_s, tmp);
 
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
 							ARCH_CONVERT)];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	}
 
 	/*
@@ -2345,7 +2345,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context,
 
 	aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
 	aep->a_valuelen = valuelen;
-	bcopy(name, aep->a_name, namelen);
+	memcpy(aep->a_name, name, namelen);
 	aep->a_name[ namelen ] = 0;
 	context->alist->al_offset[ context->count++ ] = context->firstu;
 	context->alist->al_count = context->count;
@@ -2404,7 +2404,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	}
 	ASSERT(INT_GET(entry->hashval, ARCH_CONVERT) == args->hashval);
 	ASSERT(namelen == args->namelen);
-	ASSERT(bcmp(name, args->name, namelen) == 0);
+	ASSERT(memcmp(name, args->name, namelen) == 0);
 #endif /* DEBUG */
 
 	entry->flags &= ~XFS_ATTR_INCOMPLETE;
@@ -2559,7 +2559,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	}
 	ASSERT(INT_GET(entry1->hashval, ARCH_CONVERT) == INT_GET(entry2->hashval, ARCH_CONVERT));
 	ASSERT(namelen1 == namelen2);
-	ASSERT(bcmp(name1, name2, namelen1) == 0);
+	ASSERT(memcmp(name1, name2, namelen1) == 0);
 #endif /* DEBUG */
 
 	ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 6bf9632238e7..44e3aa012c17 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -489,7 +489,7 @@ xfs_bmap_add_attrfork_local(
 		return 0;
 	if ((ip->i_d.di_mode & IFMT) == IFDIR) {
 		mp = ip->i_mount;
-		bzero(&dargs, sizeof(dargs));
+		memset(&dargs, 0, sizeof(dargs));
 		dargs.dp = ip;
 		dargs.firstblock = firstblock;
 		dargs.flist = flist;
@@ -3146,7 +3146,7 @@ xfs_bmap_delete_exlist(
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
-	ovbcopy(&base[idx + count], &base[idx],
+	memmove(&base[idx], &base[idx + count],
 		(nextents - idx) * sizeof(*base));
 	xfs_iext_realloc(ip, -count, whichfork);
 }
@@ -3310,7 +3310,7 @@ xfs_bmap_insert_exlist(
 	xfs_iext_realloc(ip, count, whichfork);
 	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ovbcopy(&base[idx], &base[idx + count],
+	memmove(&base[idx + count], &base[idx],
 		(nextents - (idx + count)) * sizeof(*base));
 	for (to = idx; to < idx + count; to++, new++)
 		xfs_bmbt_set_all(&base[to], new);
@@ -3380,7 +3380,7 @@ xfs_bmap_local_to_extents(
 		ASSERT(args.len == 1);
 		*firstblock = args.fsbno;
 		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-		bcopy(ifp->if_u1.if_data, (char *)XFS_BUF_PTR(bp),
+		memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
 			ifp->if_bytes);
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -3556,7 +3556,7 @@ xfs_bmap_trace_addentry(
 	if (cnt == 1) {
 		ASSERT(r2 == NULL);
 		r2 = &tr2;
-		bzero(&tr2, sizeof(tr2));
+		memset(&tr2, 0, sizeof(tr2));
 	} else
 		ASSERT(r2 != NULL);
 	ktrace_enter(xfs_bmap_trace_buf,
@@ -4462,7 +4462,7 @@ xfs_bmap_read_extents(
 		 */
 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-		bcopy(frp, trp, num_recs * sizeof(*frp));
+		memcpy(trp, frp, num_recs * sizeof(*frp));
 		if (exntf == XFS_EXTFMT_NOSTATE) {
 			/*
 			 * Check all attribute bmap btree records and
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 5b384f4f9cba..4e41699fbb63 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -381,9 +381,9 @@ xfs_bmbt_delrec(
 		}
 #endif
 		if (ptr < numrecs) {
-			ovbcopy(&kp[ptr], &kp[ptr - 1],
+			memmove(&kp[ptr - 1], &kp[ptr],
 				(numrecs - ptr) * sizeof(*kp));
-			ovbcopy(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
+			memmove(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
 				(numrecs - ptr) * sizeof(*pp));
 			xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
 			xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
@@ -391,7 +391,7 @@ xfs_bmbt_delrec(
 	} else {
 		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
 		if (ptr < numrecs) {
-			ovbcopy(&rp[ptr], &rp[ptr - 1],
+			memmove(&rp[ptr - 1], &rp[ptr],
 				(numrecs - ptr) * sizeof(*rp));
 			xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
 		}
@@ -619,14 +619,14 @@ xfs_bmbt_delrec(
 			}
 		}
 #endif
-		bcopy(rkp, lkp, numrrecs * sizeof(*lkp));
-		bcopy(rpp, lpp, numrrecs * sizeof(*lpp));
+		memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
+		memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
 		xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 		xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		bcopy(rrp, lrp, numrrecs * sizeof(*lrp));
+		memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
 		xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, numrrecs);
@@ -861,9 +861,9 @@ xfs_bmbt_insrec(
 			}
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(numrecs - ptr + 1) * sizeof(*kp));
-		ovbcopy(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
+		memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
 			(numrecs - ptr + 1) * sizeof(*pp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop,
@@ -880,7 +880,7 @@ xfs_bmbt_insrec(
 		xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
 	} else {
 		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(numrecs - ptr + 1) * sizeof(*rp));
 		rp[ptr - 1] = *recp;
 		numrecs++;
@@ -980,7 +980,7 @@ xfs_bmbt_killroot(
 	ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	bcopy(ckp, kp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(kp, ckp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
@@ -991,7 +991,7 @@ xfs_bmbt_killroot(
 		}
 	}
 #endif
-	bcopy(cpp, pp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(pp, cpp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
 	xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
 		cur->bc_private.b.flist, cur->bc_mp);
 	if (!async)
@@ -1349,12 +1349,12 @@ xfs_bmbt_lshift(
 			}
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, rrecs * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, rrecs * sizeof(*rpp));
+		memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
+		memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
 		xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
 		xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
 	} else {
-		ovbcopy(rrp + 1, rrp, rrecs * sizeof(*rrp));
+		memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
 		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
 		rkp = &key;
@@ -1452,8 +1452,8 @@ xfs_bmbt_rshift(
 			}
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -1467,7 +1467,7 @@ xfs_bmbt_rshift(
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
 		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
@@ -1629,15 +1629,15 @@ xfs_bmbt_split(
 			}
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, i, cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->br_startoff = xfs_bmbt_get_startoff(rrp);
 	}
@@ -1748,8 +1748,8 @@ xfs_bmdr_to_bmbt(
 	fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
 	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
-	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
-	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
 
 /*
@@ -2333,7 +2333,7 @@ xfs_bmbt_newroot(
 	cur->bc_ptrs[level + 1] = 1;
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	bcopy(kp, ckp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(ckp, kp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
 	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
@@ -2343,7 +2343,7 @@ xfs_bmbt_newroot(
 		}
 	}
 #endif
-	bcopy(pp, cpp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(cpp, pp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
 #ifdef DEBUG
 	if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
 			level))) {
@@ -2559,8 +2559,8 @@ xfs_bmbt_to_bmdr(
 	fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
 	tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
-	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
-	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
 
 /*
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index ac62646cacde..8a837fab5ad0 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -766,7 +766,7 @@ xfs_buf_item_init(
 	 * to have logged.
 	 */
 	bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
-	bcopy(XFS_BUF_PTR(bp), bip->bli_orig, XFS_BUF_COUNT(bp));
+	memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
 	bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
 #endif
 
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 020801f897ae..06d8371730bc 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -360,7 +360,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
 			     (char *)leaf);
 	}
-	bcopy(oldroot, node, size);
+	memcpy(node, oldroot, size);
 	xfs_da_log_buf(tp, bp, 0, size - 1);
 	xfs_da_buf_done(blk1->bp);
 	blk1->bp = bp;
@@ -527,7 +527,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
 			btree_s = &node2->btree[0];
 			btree_d = &node2->btree[count];
-			ovbcopy(btree_s, btree_d, tmp);
+			memmove(btree_d, btree_s, tmp);
 		}
 
 		/*
@@ -538,7 +538,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
 		btree_d = &node2->btree[0];
-		bcopy(btree_s, btree_d, tmp);
+		memcpy(btree_d, btree_s, tmp);
 		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
 
 	} else {
@@ -550,7 +550,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[0];
 		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
-		bcopy(btree_s, btree_d, tmp);
+		memcpy(btree_d, btree_s, tmp);
 		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
 		xfs_da_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
@@ -562,7 +562,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[count];
 		btree_d = &node2->btree[0];
-		ovbcopy(btree_s, btree_d, tmp);
+		memmove(btree_d, btree_s, tmp);
 		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
 	}
 
@@ -622,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	btree = &node->btree[ oldblk->index ];
 	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
 		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
-		ovbcopy(btree, btree + 1, tmp);
+		memmove(btree + 1, btree, tmp);
 	}
 	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
 	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
@@ -790,7 +790,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	}
 	ASSERT(INT_ISZERO(blkinfo->forw, ARCH_CONVERT));
 	ASSERT(INT_ISZERO(blkinfo->back, ARCH_CONVERT));
-	bcopy(bp->data, root_blk->bp->data, state->blocksize);
+	memcpy(root_blk->bp->data, bp->data, state->blocksize);
 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 	error = xfs_da_shrink_inode(args, child, bp);
 	return(error);
@@ -842,7 +842,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -898,7 +898,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1001,12 +1001,12 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
-		ovbcopy(btree + 1, btree, tmp);
+		memmove(btree, btree + 1, tmp);
 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
 		    XFS_DA_LOGRANGE(node, btree, tmp));
 		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
 	}
-	bzero((char *)btree, sizeof(xfs_da_node_entry_t));
+	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
@@ -1049,7 +1049,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	{
 		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
 		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
-		ovbcopy(&save_node->btree[0], btree, tmp);
+		memmove(btree, &save_node->btree[0], tmp);
 		btree = &save_node->btree[0];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
@@ -1067,7 +1067,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Move all the B-tree elements from drop_blk to save_blk.
 	 */
 	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
-	bcopy(&drop_node->btree[0], btree, tmp);
+	memcpy(btree, &drop_node->btree[0], tmp);
 	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
 
 	xfs_da_log_buf(tp, save_blk->bp,
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Copy the last block into the dead buffer and log it.
 	 */
-	bcopy(last_buf->data, dead_buf->data, mp->m_dirblksize);
+	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
 	dead_info = dead_buf->data;
 	/*
@@ -2343,7 +2343,7 @@ xfs_da_state_free(xfs_da_state_t *state)
 	if (state->extravalid && state->extrablk.bp)
 		xfs_da_buf_done(state->extrablk.bp);
 #ifdef DEBUG
-	bzero((char *)state, sizeof(*state));
+	memset((char *)state, 0, sizeof(*state));
 #endif /* DEBUG */
 	kmem_zone_free(xfs_da_state_zone, state);
 }
@@ -2390,7 +2390,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
 			bp = bps[i];
-			bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+			memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
 				XFS_BUF_COUNT(bp));
 		}
 	}
@@ -2431,7 +2431,7 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
 		for (i = off = 0; i < dabuf->nbuf;
 				i++, off += XFS_BUF_COUNT(bp)) {
 			bp = dabuf->bps[i];
-			bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+			memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
 				XFS_BUF_COUNT(bp));
 		}
 	}
@@ -2462,7 +2462,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 			dabuf->next->prev = dabuf->prev;
 		mutex_spinunlock(&xfs_dabuf_global_lock, s);
 	}
-	bzero(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
 #endif
 	if (dabuf->nbuf == 1)
 		kmem_zone_free(xfs_dabuf_zone, dabuf);
@@ -2532,7 +2532,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 		bp = dabuf->bps[0];
 	} else {
 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 	}
 	xfs_da_buf_done(dabuf);
 	for (i = 0; i < nbuf; i++)
@@ -2558,7 +2558,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 		bp = dabuf->bps[0];
 	} else {
 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 	}
 	xfs_da_buf_done(dabuf);
 	for (i = 0; i < nbuf; i++)
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index ea2f5798cf70..24248a19942d 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -210,7 +210,7 @@ xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
 	xfs_da_args_t args;
 	int error;
 
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = dir;
 	args.trans = trans;
 
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 1ee94f626cc1..403117dd28ff 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -147,7 +147,7 @@ xfs_dir2_init(
 	xfs_da_args_t	args;		/* operation arguments */
 	int		error;		/* error return value */
 
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = dp;
 	args.trans = tp;
 	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
@@ -711,7 +711,7 @@ xfs_dir2_put_dirent64_direct(
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
 	pa->done = 1;
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	return 0;
 }
 
@@ -743,7 +743,7 @@ xfs_dir2_put_dirent64_uio(
 	idbp->d_ino = pa->ino;
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	rval = uiomove((caddr_t)idbp, reclen, UIO_READ, uio);
 	pa->done = (rval == 0);
 	return rval;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index de56814d5d25..3756923e8740 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -331,7 +331,7 @@ xfs_dir2_block_addname(
 		blp--;
 		mid++;
 		if (mid)
-			ovbcopy(&blp[1], blp, mid * sizeof(*blp));
+			memmove(blp, &blp[1], mid * sizeof(*blp));
 		lfloglow = 0;
 		lfloghigh = mid;
 	}
@@ -357,7 +357,7 @@ xfs_dir2_block_addname(
 		    (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
 		     mid - lowstale <= highstale - mid)) {
 			if (mid - lowstale)
-				ovbcopy(&blp[lowstale + 1], &blp[lowstale],
+				memmove(&blp[lowstale], &blp[lowstale + 1],
 					(mid - lowstale) * sizeof(*blp));
 			lfloglow = MIN(lowstale, lfloglow);
 			lfloghigh = MAX(mid, lfloghigh);
@@ -369,7 +369,7 @@ xfs_dir2_block_addname(
 			ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
 			mid++;
 			if (highstale - mid)
-				ovbcopy(&blp[mid], &blp[mid + 1],
+				memmove(&blp[mid + 1], &blp[mid],
 					(highstale - mid) * sizeof(*blp));
 			lfloglow = MIN(mid, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
@@ -397,7 +397,7 @@ xfs_dir2_block_addname(
 	 */
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, args->namelen);
+	memcpy(dep->name, args->name, args->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	/*
@@ -717,7 +717,7 @@ xfs_dir2_block_lookup_int(
 		 */
 		if (dep->namelen == args->namelen &&
 		    dep->name[0] == args->name[0] &&
-		    bcmp(dep->name, args->name, args->namelen) == 0) {
+		    memcmp(dep->name, args->name, args->namelen) == 0) {
 			*bpp = bp;
 			*entno = mid;
 			return 0;
@@ -1075,7 +1075,7 @@ xfs_dir2_sf_to_block(
 	buf_len = dp->i_df.if_bytes;
 	buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
 
-	bcopy(sfp, buf, dp->i_df.if_bytes);
+	memcpy(buf, sfp, dp->i_df.if_bytes);
 	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
 	dp->i_d.di_size = 0;
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@@ -1199,7 +1199,7 @@ xfs_dir2_sf_to_block(
 		INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT));
 		dep->namelen = sfep->namelen;
-		bcopy(sfep->name, dep->name, dep->namelen);
+		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 		xfs_dir2_data_log_entry(tp, bp, dep);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 7ea956729cf1..7481245193d4 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -340,7 +340,7 @@ xfs_dir2_data_freescan(
 	/*
 	 * Start by clearing the table.
 	 */
-	bzero(d->hdr.bestfree, sizeof(d->hdr.bestfree));
+	memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
 	*loghead = 1;
 	/*
 	 * Set up pointers.
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index c201111f7339..7ab474f6a3a3 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -110,7 +110,7 @@ xfs_dir2_block_to_leaf(
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
-	bcopy(blp, leaf->ents, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+	memcpy(leaf->ents, blp, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
 	needscan = 0;
 	needlog = 1;
@@ -353,7 +353,7 @@ xfs_dir2_leaf_addname(
 		 */
 		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
 			bestsp--;
-			ovbcopy(&bestsp[1], &bestsp[0],
+			memmove(&bestsp[0], &bestsp[1],
 				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
 			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
 			xfs_dir2_leaf_log_tail(tp, lbp);
@@ -402,7 +402,7 @@ xfs_dir2_leaf_addname(
 	dep = (xfs_dir2_data_entry_t *)dup;
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, dep->namelen);
+	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
 	/*
@@ -434,7 +434,7 @@ xfs_dir2_leaf_addname(
 		 * lep is still good as the index leaf entry.
 		 */
 		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
-			ovbcopy(lep, lep + 1,
+			memmove(lep + 1, lep,
 				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
 		/*
 		 * Record low and high logging indices for the leaf.
@@ -493,8 +493,8 @@ xfs_dir2_leaf_addname(
 			 * and make room for the new entry.
 			 */
 			if (index - lowstale - 1 > 0)
-				ovbcopy(&leaf->ents[lowstale + 1],
-					&leaf->ents[lowstale],
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
 					(index - lowstale - 1) * sizeof(*lep));
 			lep = &leaf->ents[index - 1];
 			lfloglow = MIN(lowstale, lfloglow);
@@ -512,8 +512,8 @@ xfs_dir2_leaf_addname(
 			 * and make room for the new entry.
 			 */
 			if (highstale - index > 0)
-				ovbcopy(&leaf->ents[index],
-					&leaf->ents[index + 1],
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
 					(highstale - index) * sizeof(*lep));
 			lep = &leaf->ents[index];
 			lfloglow = MIN(index, lfloglow);
@@ -847,7 +847,7 @@ xfs_dir2_leaf_getdents(
 					 * the table.
 					 */
 					if (!map->br_blockcount && --map_valid)
-						ovbcopy(&map[1], &map[0],
+						memmove(&map[0], &map[1],
 							sizeof(map[0]) *
 							map_valid);
 					i -= j;
@@ -909,8 +909,8 @@ xfs_dir2_leaf_getdents(
 						nmap--;
 						length = map_valid + nmap - i;
 						if (length)
-							ovbcopy(&map[i + 1],
-								&map[i],
+							memmove(&map[i],
+								&map[i + 1],
 								sizeof(map[i]) *
 								length);
 					} else {
@@ -1409,7 +1409,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		if (dep->namelen == args->namelen &&
 		    dep->name[0] == args->name[0] &&
-		    bcmp(dep->name, args->name, args->namelen) == 0) {
+		    memcmp(dep->name, args->name, args->namelen) == 0) {
 			*dbpp = dbp;
 			*indexp = index;
 			return 0;
@@ -1544,7 +1544,7 @@ xfs_dir2_leaf_removename(
 			 * Copy the table down so inactive entries at the
 			 * end are removed.
 			 */
-			ovbcopy(bestsp, &bestsp[db - i],
+			memmove(&bestsp[db - i], bestsp,
 				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
 			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
 			xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1728,7 +1728,7 @@ xfs_dir2_leaf_trim_data(
 	 */
 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
 	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
-	ovbcopy(&bestsp[0], &bestsp[1], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+	memmove(&bestsp[1], &bestsp[0], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	return 0;
@@ -1842,7 +1842,7 @@ xfs_dir2_node_to_leaf(
 	/*
 	 * Set up the leaf bests table.
 	 */
-	bcopy(free->bests, XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT),
+	memcpy(XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), free->bests,
 		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 99661539e595..54234b40ed6b 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -239,7 +239,7 @@ xfs_dir2_leafn_add(
 	if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
 		lep = &leaf->ents[index];
 		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
-			ovbcopy(lep, lep + 1,
+			memmove(lep + 1, lep,
 				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
 		lfloglow = index;
 		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
@@ -288,8 +288,8 @@ xfs_dir2_leafn_add(
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(index - lowstale - 1 >= 0);
 			if (index - lowstale - 1 > 0)
-				ovbcopy(&leaf->ents[lowstale + 1],
-					&leaf->ents[lowstale],
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
 					(index - lowstale - 1) * sizeof(*lep));
 			lep = &leaf->ents[index - 1];
 			lfloglow = MIN(lowstale, lfloglow);
@@ -304,8 +304,8 @@ xfs_dir2_leafn_add(
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(highstale - index >= 0);
 			if (highstale - index > 0)
-				ovbcopy(&leaf->ents[index],
-					&leaf->ents[index + 1],
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
 					(highstale - index) * sizeof(*lep));
 			lep = &leaf->ents[index];
 			lfloglow = MIN(index, lfloglow);
@@ -564,7 +564,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			if (dep->namelen == args->namelen &&
 			    dep->name[0] == args->name[0] &&
-			    bcmp(dep->name, args->name, args->namelen) == 0) {
+			    memcmp(dep->name, args->name, args->namelen) == 0) {
 				args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
 				*indexp = index;
 				state->extravalid = 1;
@@ -644,7 +644,7 @@ xfs_dir2_leafn_moveents(
 	 * to hold the new entries.
 	 */
 	if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
-		ovbcopy(&leaf_d->ents[start_d], &leaf_d->ents[start_d + count],
+		memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d],
 			(INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
 			sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
@@ -666,7 +666,7 @@ xfs_dir2_leafn_moveents(
 	/*
 	 * Copy the leaf entries from source to destination.
 	 */
-	bcopy(&leaf_s->ents[start_s], &leaf_d->ents[start_d],
+	memcpy(&leaf_d->ents[start_d], &leaf_s->ents[start_s],
 		count * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
 	/*
@@ -674,7 +674,7 @@ xfs_dir2_leafn_moveents(
 	 * delete the ones we copied by sliding the next ones down.
 	 */
 	if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
-		ovbcopy(&leaf_s->ents[start_s + count], &leaf_s->ents[start_s],
+		memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count],
 			count * sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
 	}
@@ -1135,7 +1135,7 @@ xfs_dir2_leafn_toosmall(
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
 		if (error)
@@ -1197,7 +1197,7 @@ xfs_dir2_leafn_toosmall(
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno)
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
@@ -1685,7 +1685,7 @@ xfs_dir2_node_addname_int(
 	dep = (xfs_dir2_data_entry_t *)dup;
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, dep->namelen);
+	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
 	xfs_dir2_data_log_entry(tp, dbp, dep);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1d2d3d9b2c2..aaba9972bd57 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -176,7 +176,7 @@ xfs_dir2_block_to_sf(
 	 * and add local data.
 	 */
 	block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-	bcopy(bp->data, block, mp->m_dirblksize);
+	memcpy(block, bp->data, mp->m_dirblksize);
 	logflags = XFS_ILOG_CORE;
 	if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
 		ASSERT(error != ENOSPC);
@@ -198,7 +198,7 @@ xfs_dir2_block_to_sf(
 	 * Copy the header into the newly allocate local space.
 	 */
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	bcopy(sfhp, sfp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+	memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
 	dp->i_d.di_size = size;
 	/*
 	 * Set up to loop over the block's entries.
@@ -241,7 +241,7 @@ xfs_dir2_block_to_sf(
 			XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)block), ARCH_CONVERT);
-			bcopy(dep->name, sfep->name, dep->namelen);
+			memcpy(sfep->name, dep->name, dep->namelen);
 			temp=INT_GET(dep->inumber, ARCH_CONVERT);
 			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &temp,
 				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -405,7 +405,7 @@ xfs_dir2_sf_addname_easy(
 	 */
 	sfep->namelen = args->namelen;
 	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
-	bcopy(args->name, sfep->name, sfep->namelen);
+	memcpy(sfep->name, args->name, sfep->namelen);
 	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
 		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
 	/*
@@ -457,7 +457,7 @@ xfs_dir2_sf_addname_hard(
 	old_isize = (int)dp->i_d.di_size;
 	buf = kmem_alloc(old_isize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)buf;
-	bcopy(sfp, oldsfp, old_isize);
+	memcpy(oldsfp, sfp, old_isize);
 	/*
 	 * Loop over the old directory finding the place we're going
 	 * to insert the new entry.
@@ -490,14 +490,14 @@ xfs_dir2_sf_addname_hard(
 	 * Copy the first part of the directory, including the header.
 	 */
 	nbytes = (int)((char *)oldsfep - (char *)oldsfp);
-	bcopy(oldsfp, sfp, nbytes);
+	memcpy(sfp, oldsfp, nbytes);
 	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
 	/*
 	 * Fill in the new entry, and update the header counts.
 	 */
 	sfep->namelen = args->namelen;
 	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
-	bcopy(args->name, sfep->name, sfep->namelen);
+	memcpy(sfep->name, args->name, sfep->namelen);
 	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
 		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
 	sfp->hdr.count++;
@@ -510,7 +510,7 @@ xfs_dir2_sf_addname_hard(
 	 */
 	if (!eof) {
 		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
-		bcopy(oldsfep, sfep, old_isize - nbytes);
+		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
 	kmem_free(buf, old_isize);
 	dp->i_d.di_size = new_isize;
@@ -916,7 +916,7 @@ xfs_dir2_sf_lookup(
 	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
-		    bcmp(args->name, sfep->name, args->namelen) == 0) {
+		    memcmp(args->name, sfep->name, args->namelen) == 0) {
 			args->inumber =
 				XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -971,7 +971,7 @@ xfs_dir2_sf_removename(
 	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
-		    bcmp(sfep->name, args->name, args->namelen) == 0) {
+		    memcmp(sfep->name, args->name, args->namelen) == 0) {
 			ASSERT(XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT) ==
 				args->inumber);
@@ -994,7 +994,7 @@ xfs_dir2_sf_removename(
 	 * Copy the part if any after the removed entry, sliding it down.
 	 */
 	if (byteoff + entsize < oldsize)
-		ovbcopy((char *)sfp + byteoff + entsize, (char *)sfp + byteoff,
+		memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize,
 			oldsize - (byteoff + entsize));
 	/*
 	 * Fix up the header and file size.
@@ -1108,7 +1108,7 @@ xfs_dir2_sf_replace(
 		     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 			if (sfep->namelen == args->namelen &&
 			    sfep->name[0] == args->name[0] &&
-			    bcmp(args->name, sfep->name, args->namelen) == 0) {
+			    memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_FILESYSTEMS || defined(DEBUG)
 				ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -1196,7 +1196,7 @@ xfs_dir2_sf_toino4(
 	buf = kmem_alloc(oldsize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	ASSERT(oldsfp->hdr.i8count == 1);
-	bcopy(oldsfp, buf, oldsize);
+	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
@@ -1228,7 +1228,7 @@ xfs_dir2_sf_toino4(
 		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
-		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
 			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
 		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -1273,7 +1273,7 @@ xfs_dir2_sf_toino8(
 	buf = kmem_alloc(oldsize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	ASSERT(oldsfp->hdr.i8count == 0);
-	bcopy(oldsfp, buf, oldsize);
+	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
@@ -1305,7 +1305,7 @@ xfs_dir2_sf_toino8(
 		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
-		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
 			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
 		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 576c19f17c68..7ee59c6107b3 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -93,7 +93,7 @@ typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
  * be calculated on the fly.
  *
  * Entries are packed toward the top as tightly as possible.  The header
- * and the elements must be bcopy()'d out into a work area to get correct
+ * and the elements must be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
 typedef struct xfs_dir2_sf_hdr {
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index fe9280e1f427..3e517abee119 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -65,9 +65,9 @@ xfs_dir2_trace_enter(
 	ASSERT(xfs_dir2_trace_buf);
 	ASSERT(dp->i_dir_trace);
 	if (name)
-		bcopy(name, n, min(sizeof(n), namelen));
+		memcpy(n, name, min(sizeof(n), namelen));
 	else
-		bzero((char *)n, sizeof(n));
+		memset((char *)n, 0, sizeof(n));
 	ktrace_enter(xfs_dir2_trace_buf,
 		(void *)(__psunsigned_t)type, (void *)where,
 		(void *)a0, (void *)a1, (void *)a2, (void *)a3,
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 1aceaf37693d..8c06f2fa1251 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -171,7 +171,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    args->name[0] == sfe->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0)
+		    memcmp(args->name, sfe->name, args->namelen) == 0)
 			return(XFS_ERROR(EEXIST));
 		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
 	}
@@ -184,7 +184,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 
 	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
 	sfe->namelen = args->namelen;
-	bcopy(args->name, sfe->name, sfe->namelen);
+	memcpy(sfe->name, args->name, sfe->namelen);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
 
 	dp->i_d.di_size += size;
@@ -223,7 +223,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(sfe->name, args->name, args->namelen) == 0)
+		    memcmp(sfe->name, args->name, args->namelen) == 0)
 			break;
 		base += size;
 		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
@@ -234,7 +234,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 	}
 
 	if ((base + size) != dp->i_d.di_size) {
-		ovbcopy(&((char *)sf)[base+size], &((char *)sf)[base],
+		memmove(&((char *)sf)[base], &((char *)sf)[base+size],
 					      dp->i_d.di_size - (base+size));
 	}
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
@@ -283,7 +283,7 @@ xfs_dir_shortform_lookup(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0) {
+		    memcmp(args->name, sfe->name, args->namelen) == 0) {
 			XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args->inumber, ARCH_CONVERT);
 			return(XFS_ERROR(EEXIST));
 		}
@@ -324,7 +324,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
 
-	bcopy(dp->i_df.if_u1.if_data, tmpbuffer, size);
+	memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
 
 	sf = (xfs_dir_shortform_t *)tmpbuffer;
 	XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &inumber, ARCH_CONVERT);
@@ -611,8 +611,8 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0) {
-			ASSERT(bcmp((char *)&args->inumber,
+		    memcmp(args->name, sfe->name, args->namelen) == 0) {
+			ASSERT(memcmp((char *)&args->inumber,
 				(char *)&sfe->inumber, sizeof(xfs_ino_t)));
 			XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
 			xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
@@ -650,10 +650,10 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 	if (retval)
 		return(retval);
 	ASSERT(bp != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
 	 * Find and special case the parent inode number
@@ -736,7 +736,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 		return(retval);
 	}
 	ASSERT(bp2 != NULL);
-	bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
 	xfs_da_buf_done(bp1);
 	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
@@ -787,7 +787,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 		return(retval);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
@@ -960,7 +960,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
 		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
-		ovbcopy(entry, entry + 1, tmp);
+		memmove(entry + 1, entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
 	}
@@ -986,7 +986,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	 */
 	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
 	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &namest->inumber, ARCH_CONVERT);
-	bcopy(args->name, namest->name, args->namelen);
+	memcpy(namest->name, args->name, args->namelen);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
 
@@ -1029,7 +1029,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 	lbsize = XFS_LBSIZE(mp);
 	tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(bp->data, tmpbuffer, lbsize);
+	memcpy(tmpbuffer, bp->data, lbsize);
 
 	/*
 	 * Make a second copy in case xfs_dir_leaf_moveents()
@@ -1037,9 +1037,9 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 	 */
 	if (musthave || justcheck) {
 		tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
-		bcopy(bp->data, tmpbuffer2, lbsize);
+		memcpy(tmpbuffer2, bp->data, lbsize);
 	}
-	bzero(bp->data, lbsize);
+	memset(bp->data, 0, lbsize);
 
 	/*
 	 * Copy basic information
@@ -1072,7 +1072,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 
 	if (justcheck || rval == ENOSPC) {
 		ASSERT(tmpbuffer2);
-		bcopy(tmpbuffer2, bp->data, lbsize);
+		memcpy(bp->data, tmpbuffer2, lbsize);
 	} else {
 		xfs_da_log_buf(trans, bp, 0, lbsize - 1);
 	}
@@ -1357,7 +1357,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -1418,7 +1418,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1538,17 +1538,17 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
 	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-	bzero((char *)namest, entsize);
+	memset((char *)namest, 0, entsize);
 	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
 
 	INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
 	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
-	ovbcopy(entry + 1, entry, tmp);
+	memmove(entry, entry + 1, tmp);
 	INT_MOD(hdr->count, ARCH_CONVERT, -1);
 	xfs_da_log_buf(trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
 	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
-	bzero((char *)entry, sizeof(xfs_dir_leaf_entry_t));
+	memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
 
 	/*
 	 * If we removed the first entry, re-find the first used byte
@@ -1642,7 +1642,7 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
 		ASSERT(tmpbuffer != NULL);
-		bzero(tmpbuffer, state->blocksize);
+		memset(tmpbuffer, 0, state->blocksize);
 		tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info; /* struct copy */
@@ -1664,7 +1664,7 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
 					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
 		}
-		bcopy(tmp_leaf, save_leaf, state->blocksize);
+		memcpy(save_leaf, tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
 	}
 
@@ -1750,7 +1750,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
 		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
 		if (entry->namelen == args->namelen &&
 		    namest->name[0] == args->name[0] &&
-		    bcmp(args->name, namest->name, args->namelen) == 0) {
+		    memcmp(args->name, namest->name, args->namelen) == 0) {
 			XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args->inumber, ARCH_CONVERT);
 			*index = probe;
 			return(XFS_ERROR(EEXIST));
@@ -1813,7 +1813,7 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
-		bcopy(entry_s, entry_d, tmp);
+		memcpy(entry_d, entry_s, tmp);
 	}
 
 	/*
@@ -1831,11 +1831,11 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT);
 		entry_d->namelen = entry_s->namelen;
 		ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		bcopy(XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
-		      XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), tmp);
+		memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)),
+		       XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp);
 		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		bzero((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
-		      tmp);
+		memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+		      0, tmp);
 		INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
 		INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
 		INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
@@ -1853,7 +1853,7 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	} else {
 		/*
 		 * Move the remaining entries down to fill the hole,
@@ -1863,12 +1863,12 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
-		bcopy(entry_s, entry_d, tmp);
+		memcpy(entry_d, entry_s, tmp);
 
 		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
 		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	}
 
 	/*
@@ -2191,7 +2191,7 @@ xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
 	idbp->d_off = pa->cook.o;
 	idbp->d_name[namelen] = '\0';
 	pa->done = 1;
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	return 0;
 }
 
@@ -2217,7 +2217,7 @@ xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
 	idbp->d_ino = pa->ino;
 	idbp->d_off = pa->cook.o;
 	idbp->d_name[namelen] = '\0';
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	retval = uiomove((caddr_t)idbp, reclen, UIO_READ, uio);
 	pa->done = (retval == 0);
 	return retval;
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
index ede171472223..10c60645f1fc 100644
--- a/fs/xfs/xfs_dir_sf.h
+++ b/fs/xfs/xfs_dir_sf.h
@@ -46,7 +46,7 @@ typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
  * be calculated on the fly.
  *
  * Entries are packed toward the top as tight as possible.  The header
- * and the elements much be bcopy()'d out into a work area to get correct
+ * and the elements much be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
 typedef struct xfs_dir_shortform {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f67497d78935..fd1ae9b156b1 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -94,7 +94,7 @@ xfs_qm_dqinit(
 #endif
 	} else {
 		/*
-		 * Only the q_core portion was bzeroed in dqreclaim_one().
+		 * Only the q_core portion was zeroed in dqreclaim_one().
 		 * So, we need to reset others.
 		 */
 		 dqp->q_nrefs = 0;
@@ -156,7 +156,7 @@ xfs_qm_dqinit_core(
 	xfs_dqblk_t	 *d)
 {
 	/*
-	 * Caller has bzero'd the entire dquot 'chunk' already.
+	 * Caller has zero'd the entire dquot 'chunk' already.
 	 */
 	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
 	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
@@ -351,7 +351,7 @@ xfs_qm_init_dquot_blk(
 	 */
 	curid = id - (id % XFS_QM_DQPERBLK(mp));
 	ASSERT(curid >= 0);
-	bzero(d, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
+	memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
 	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
 		xfs_qm_dqinit_core(curid, type, d);
 	xfs_trans_dquot_buf(tp, bp,
@@ -614,7 +614,7 @@ xfs_qm_dqread(
 	}
 
 	/* copy everything from disk dquot to the incore dquot */
-	bcopy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
 	ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
 	xfs_qm_dquot_logitem_init(dqp);
 
@@ -1209,7 +1209,7 @@ xfs_qm_dqflush(
 	}
 
 	/* This is the only portion of data that needs to persist */
-	bcopy(&(dqp->q_core), ddqp, sizeof(xfs_disk_dquot_t));
+	memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
 
 	/*
 	 * Clear the dirty field and remember the flush lsn for later use.
@@ -1475,7 +1475,7 @@ xfs_qm_dqpurge(
 	dqp->q_mount = NULL;;
 	dqp->q_hash = NULL;
 	dqp->dq_flags = XFS_DQ_INACTIVE;
-	bzero(&dqp->q_core, sizeof(dqp->q_core));
+	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
 	XFS_DQ_HASH_UNLOCK(thishash);
@@ -1585,7 +1585,7 @@ xfs_qm_dqcheck(
 	 */
 	ASSERT(id != -1);
 	ASSERT(flags & XFS_QMOPT_DQREPAIR);
-	bzero(ddq, sizeof(xfs_dqblk_t));
+	memset(ddq, 0, sizeof(xfs_dqblk_t));
 	xfs_qm_dqinit_core(id, type, (xfs_dqblk_t *)ddq);
 	return (errs);
 }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index b5ceb0316aba..5e1e059f8fb0 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -69,9 +69,9 @@ char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
 void
 xfs_error_test_init(void)
 {
-	bzero(xfs_etest, sizeof(xfs_etest));
-	bzero(xfs_etest_fsid, sizeof(xfs_etest_fsid));
-	bzero(xfs_etest_fsname, sizeof(xfs_etest_fsname));
+	memset(xfs_etest, 0, sizeof(xfs_etest));
+	memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
+	memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
 }
 
 int
@@ -84,7 +84,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
 	if (random() % randfactor)
 		return 0;
 
-	bcopy(fsidp, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, fsidp, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
 		if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
@@ -105,7 +105,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
 	int len;
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
 		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
@@ -138,7 +138,7 @@ xfs_errortag_clear(int error_tag, xfs_mount_t *mp)
 	int i;
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
 		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
@@ -191,7 +191,7 @@ xfs_errortag_clearall(xfs_mount_t *mp)
 {
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	return xfs_errortag_clearall_umount(fsid, mp->m_fsname, 1);
 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7984d92618fd..e29586bbb4fa 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -427,9 +427,9 @@ typedef struct xfs_handle {
 				 - (char *) &(handle))			  \
 				 + (handle).ha_fid.xfs_fid_len)
 
-#define XFS_HANDLE_CMP(h1, h2)	bcmp(h1, h2, sizeof (xfs_handle_t))
+#define XFS_HANDLE_CMP(h1, h2)	memcmp(h1, h2, sizeof(xfs_handle_t))
 
-#define FSHSIZE		sizeof (fsid_t)
+#define FSHSIZE		sizeof(fsid_t)
 
 
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 4cd53ed2d791..cbb6ec5d0321 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -149,7 +149,7 @@ xfs_growfs_data_private(
 			sizeof(xfs_perag_t) * nagcount,
 			sizeof(xfs_perag_t) * oagcount,
 			KM_SLEEP);
-		bzero(&mp->m_perag[oagcount],
+		memset(&mp->m_perag[oagcount], 0,
 			(nagcount - oagcount) * sizeof(xfs_perag_t));
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 		xfs_initialize_perag(mp, nagcount);
@@ -175,7 +175,7 @@ xfs_growfs_data_private(
 				  disk_addr,
 				  sectbb, 0);
 		agf = XFS_BUF_TO_AGF(bp);
-		bzero(agf, mp->m_sb.sb_sectsize);
+		memset(agf, 0, mp->m_sb.sb_sectsize);
 		INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
 		INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
 		INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
@@ -208,7 +208,7 @@ xfs_growfs_data_private(
 				  disk_addr,
 				  sectbb, 0);
 		agi = XFS_BUF_TO_AGI(bp);
-		bzero(agi, mp->m_sb.sb_sectsize);
+		memset(agi, 0, mp->m_sb.sb_sectsize);
 		INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
 		INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
 		INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
@@ -233,7 +233,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
@@ -255,7 +255,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
@@ -278,7 +278,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_ZERO(block->bb_numrecs, ARCH_CONVERT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 185e62f08c70..b8f68d7c3605 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -263,7 +263,7 @@ xfs_ialloc_ag_alloc(
 		INT_ZERO(dic.di_gid, ARCH_CONVERT);
 		INT_ZERO(dic.di_nlink, ARCH_CONVERT);
 		INT_ZERO(dic.di_projid, ARCH_CONVERT);
-		bzero(&(dic.di_pad[0]),sizeof(dic.di_pad));
+		memset(&(dic.di_pad[0]), 0, sizeof(dic.di_pad));
 		INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec);
 		INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
 
@@ -287,7 +287,7 @@ xfs_ialloc_ag_alloc(
 
 		for (i = 0; i < ninodes; i++) {
 			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
-			bcopy (&dic, &(free->di_core), sizeof(xfs_dinode_core_t));
+			memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
 			INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
 			xfs_ialloc_log_di(tp, fbuf, i,
 				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 0feaf85a5455..bded6cfd75f3 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -139,9 +139,9 @@ xfs_inobt_delrec(
 		}
 #endif
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&kp[ptr], &kp[ptr - 1],
+			memmove(&kp[ptr - 1], &kp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp));
-			ovbcopy(&pp[ptr], &pp[ptr - 1],
+			memmove(&pp[ptr - 1], &pp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp));
 			xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 			xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
@@ -154,7 +154,7 @@ xfs_inobt_delrec(
 	else {
 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&rp[ptr], &rp[ptr - 1],
+			memmove(&rp[ptr - 1], &rp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp));
 			xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 		}
@@ -450,8 +450,8 @@ xfs_inobt_delrec(
 				return error;
 		}
 #endif
-		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
-		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
+		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
+		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
 		xfs_inobt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
@@ -462,7 +462,7 @@ xfs_inobt_delrec(
 		 */
 		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
 		xfs_inobt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	}
@@ -690,9 +690,9 @@ xfs_inobt_insrec(
 				return error;
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
-		ovbcopy(&pp[ptr - 1], &pp[ptr],
+		memmove(&pp[ptr], &pp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
 		/*
 		 * Now stuff the new data in, bump numrecs and log the new data.
@@ -711,7 +711,7 @@ xfs_inobt_insrec(
 		 * It's a leaf entry.  Make a hole for the new record.
 		 */
 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
 		/*
 		 * Now stuff the new record in, bump numrecs
@@ -1170,12 +1170,12 @@ xfs_inobt_lshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	} else {
-		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
 		rkp = &key;
@@ -1421,8 +1421,8 @@ xfs_inobt_rshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
 			return error;
@@ -1434,7 +1434,7 @@ xfs_inobt_rshift(
 	} else {
 		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
@@ -1562,8 +1562,8 @@ xfs_inobt_split(
 				return error;
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		*keyp = *rkp;
@@ -1574,7 +1574,7 @@ xfs_inobt_split(
 	else {
 		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
 	}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 49d98172ec1a..5f14333dbaff 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -65,7 +65,7 @@ xfs_validate_extents(
 	xfs_bmbt_rec_t		rec;
 
 	for (i = 0; i < nrecs; i++) {
-		bcopy(ep, &rec, sizeof(rec));
+		memcpy(&rec, ep, sizeof(rec));
 		xfs_bmbt_get_all(&rec, &irec);
 		if (fmt == XFS_EXTFMT_NOSTATE)
 			ASSERT(irec.br_state == XFS_EXT_NORM);
@@ -551,7 +551,7 @@ xfs_iformat_local(
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or bcopy() below.
+	 * kmem_alloc() or memcpy() below.
 	 */
 	if (size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
@@ -575,7 +575,8 @@ xfs_iformat_local(
 	ifp->if_bytes = size;
 	ifp->if_real_bytes = real_size;
 	if (size)
-		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_data, size);
+		memcpy(ifp->if_u1.if_data,
+			XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), size);
 	ifp->if_flags &= ~XFS_IFEXTENTS;
 	ifp->if_flags |= XFS_IFINLINE;
 	return 0;
@@ -608,7 +609,7 @@ xfs_iformat_extents(
 	/*
 	 * If the number of extents is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or bcopy() below.
+	 * kmem_alloc() or memcpy() below.
 	 */
 	if (size < 0 || size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
@@ -634,8 +635,8 @@ xfs_iformat_extents(
 		xfs_validate_extents(
 			(xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
 			nex, XFS_EXTFMT_INODE(ip));
-		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_extents,
-		      size);
+		memcpy(ifp->if_u1.if_extents,
+			XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), size);
 		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
 			whichfork);
 		if (whichfork != XFS_DATA_FORK ||
@@ -731,9 +732,9 @@ xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip,
 
     if (arch == ARCH_NOCONVERT) {
 	if (dir>0) {
-	    bcopy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
+	    memcpy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
 	} else {
-	    bcopy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
+	    memcpy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
 	}
 	return;
     }
@@ -749,9 +750,9 @@ xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip,
     INT_XLATE(buf_core->di_projid,	mem_core->di_projid,	   dir, arch);
 
     if (dir>0) {
-	bcopy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
+	memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
     } else {
-	bcopy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
+	memcpy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
     }
 
     INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,  dir, arch);
@@ -1065,7 +1066,7 @@ xfs_ialloc(
 	ip->i_d.di_uid = current->fsuid;
 	ip->i_d.di_gid = current->fsgid;
 	ip->i_d.di_projid = prid;
-	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 
 	/*
 	 * If the superblock version is up to where we support new format
@@ -2156,7 +2157,7 @@ xfs_iroot_realloc(
 		ifp->if_broot_bytes = (int)new_size;
 		ASSERT(ifp->if_broot_bytes <=
 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
-		ovbcopy(op, np, cur_max * (uint)sizeof(xfs_dfsbno_t));
+		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
 		return;
 	}
 
@@ -2178,7 +2179,7 @@ xfs_iroot_realloc(
 		/*
 		 * First copy over the btree block header.
 		 */
-		bcopy(ifp->if_broot, new_broot, sizeof(xfs_bmbt_block_t));
+		memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t));
 	} else {
 		new_broot = NULL;
 		ifp->if_flags &= ~XFS_IFBROOT;
@@ -2195,7 +2196,7 @@ xfs_iroot_realloc(
 						     ifp->if_broot_bytes);
 		np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
 						     (int)new_size);
-		bcopy(op, np, new_max * (uint)sizeof(xfs_bmbt_rec_t));
+		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
 
 		/*
 		 * Then copy the pointers.
@@ -2204,7 +2205,7 @@ xfs_iroot_realloc(
 						     ifp->if_broot_bytes);
 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
 						     (int)new_size);
-		bcopy(op, np, new_max * (uint)sizeof(xfs_dfsbno_t));
+		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
 	}
 	kmem_free(ifp->if_broot, ifp->if_broot_bytes);
 	ifp->if_broot = new_broot;
@@ -2268,8 +2269,8 @@ xfs_iext_realloc(
 			 * so the if_extents pointer is null.
 			 */
 			if (ifp->if_u1.if_extents) {
-				bcopy(ifp->if_u1.if_extents,
-				      ifp->if_u2.if_inline_ext, new_size);
+				memcpy(ifp->if_u2.if_inline_ext,
+					ifp->if_u1.if_extents, new_size);
 				kmem_free(ifp->if_u1.if_extents,
 					  ifp->if_real_bytes);
 			}
@@ -2286,7 +2287,7 @@ xfs_iext_realloc(
 		if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
 			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
 				kmem_alloc(rnew_size, KM_SLEEP);
-			bcopy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+			memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
 			      sizeof(ifp->if_u2.if_inline_ext));
 		} else if (rnew_size != ifp->if_real_bytes) {
 			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
@@ -2349,7 +2350,7 @@ xfs_idata_realloc(
 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
 			ASSERT(ifp->if_real_bytes != 0);
-			bcopy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
 			      new_size);
 			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
@@ -2382,8 +2383,8 @@ xfs_idata_realloc(
 		} else {
 			ASSERT(ifp->if_real_bytes == 0);
 			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
-			bcopy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
-			      ifp->if_bytes);
+			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+				ifp->if_bytes);
 		}
 	}
 	ifp->if_real_bytes = real_size;
@@ -2610,7 +2611,7 @@ xfs_iunpin_wait(
  * returns the number of bytes copied into the buffer.
  *
  * If there are no delayed allocation extents, then we can just
- * bcopy() the extents into the buffer.	 Otherwise, we need to
+ * memcpy() the extents into the buffer.  Otherwise, we need to
  * examine each extent in turn and skip those which are delayed.
  */
 int
@@ -2646,7 +2647,7 @@ xfs_iextents_copy(
 		ASSERT(ifp->if_bytes ==
 		       (XFS_IFORK_NEXTENTS(ip, whichfork) *
 			(uint)sizeof(xfs_bmbt_rec_t)));
-		bcopy(ifp->if_u1.if_extents, buffer, ifp->if_bytes);
+		memcpy(buffer, ifp->if_u1.if_extents, ifp->if_bytes);
 		xfs_validate_extents(buffer, nrecs, XFS_EXTFMT_INODE(ip));
 		return ifp->if_bytes;
 	}
@@ -2736,7 +2737,7 @@ xfs_iflush_fork(
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-			bcopy(ifp->if_u1.if_data, cp, ifp->if_bytes);
+			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
 		}
 		if (whichfork == XFS_DATA_FORK) {
 			if (XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip)) {
@@ -2781,7 +2782,7 @@ xfs_iflush_fork(
 	case XFS_DINODE_FMT_UUID:
 		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
 			ASSERT(whichfork == XFS_DATA_FORK);
-			bcopy(&ip->i_df.if_u2.if_uuid, &dip->di_u.di_muuid,
+			memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid,
 				sizeof(uuid_t));
 		}
 		break;
@@ -3208,8 +3209,8 @@ xfs_iflush_int(
 			INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2);
 			ip->i_d.di_onlink = 0;
 			INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT);
-			bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
-			bzero(&(dip->di_core.di_pad[0]),
+			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+			memset(&(dip->di_core.di_pad[0]), 0,
 			      sizeof(dip->di_core.di_pad));
 			ASSERT(ip->i_d.di_projid == 0);
 		}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9beacce25dec..234ef996a8f5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -96,7 +96,7 @@ xfs_inode_item_size(
 			if (iip->ili_root_size > 0) {
 				ASSERT(iip->ili_root_size ==
 				       ip->i_df.if_broot_bytes);
-				ASSERT(bcmp(iip->ili_orig_root,
+				ASSERT(memcmp(iip->ili_orig_root,
 					    ip->i_df.if_broot,
 					    iip->ili_root_size) == 0);
 			} else {
@@ -297,7 +297,7 @@ xfs_inode_item_format(
 			 */
 			ip->i_d.di_version = XFS_DINODE_VERSION_2;
 			ip->i_d.di_onlink = 0;
-			bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 		}
 	}
 
@@ -889,7 +889,7 @@ xfs_inode_item_init(
 	iip->ili_inode = ip;
 
 	/*
-	   We have bzeroed memory. No need ...
+	   We have zeroed memory. No need ...
 	   iip->ili_extents_buf = NULL;
 	   iip->ili_pushbuf_flag = 0;
 	 */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index d46f2ccf3844..8fe8097215df 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -146,7 +146,7 @@ xfs_bulkstat_one(
 	buf->bs_extsize = INT_GET(dic->di_extsize, arch) << mp->m_sb.sb_blocklog;
 	buf->bs_extents = INT_GET(dic->di_nextents, arch);
 	buf->bs_gen = INT_GET(dic->di_gen, arch);
-	bzero(buf->bs_pad, sizeof(buf->bs_pad));
+	memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
 	buf->bs_dmevmask = INT_GET(dic->di_dmevmask, arch);
 	buf->bs_dmstate = INT_GET(dic->di_dmstate, arch);
 	buf->bs_aextents = INT_GET(dic->di_anextents, arch);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 942d07386d9e..5115627ae9e2 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1586,8 +1586,8 @@ xlog_unalloc_log(xlog_t *log)
  *		1. If first write of transaction, write start record
  *		2. Write log operation header (header per region)
  *		3. Find out if we can fit entire region into this iclog
- *		4. Potentially, verify destination bcopy ptr
- *		5. Bcopy (partial) region
+ *		4. Potentially, verify destination memcpy ptr
+ *		5. Memcpy (partial) region
  *		6. If partial copy, release iclog; otherwise, continue
  *			copying more regions into current iclog
  *	4. Mark want sync bit (in simulation mode)
@@ -1628,8 +1628,8 @@ xlog_write(xfs_mount_t *	mp,
     int		     start_rec_copy; /* # bytes to copy for start record */
     int		     partial_copy;   /* did we split a region? */
     int		     partial_copy_len;/* # bytes copied if split region */
-    int		     need_copy;	     /* # bytes need to bcopy this region */
-    int		     copy_len;	     /* # bytes actually bcopy'ing */
+    int		     need_copy;	     /* # bytes need to memcpy this region */
+    int		     copy_len;	     /* # bytes actually memcpy'ing */
     int		     copy_off;	     /* # bytes from entry start */
     int		     contwr;	     /* continued write of in-core log? */
     int		     firstwr = 0;    /* first write of transaction */
@@ -1733,7 +1733,7 @@ xlog_write(xfs_mount_t *	mp,
 
 	    /* Partial write last time? => (partial_copy != 0)
 	     * need_copy is the amount we'd like to copy if everything could
-	     * fit in the current bcopy.
+	     * fit in the current memcpy.
 	     */
 	    need_copy = reg[index].i_len - partial_copy_len;
 
@@ -1759,7 +1759,7 @@ xlog_write(xfs_mount_t *	mp,
 
 	    /* copy region */
 	    ASSERT(copy_len >= 0);
-	    bcopy(reg[index].i_addr + copy_off, (xfs_caddr_t)ptr, copy_len);
+	    memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
 	    xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
 
 	    /* make copy_len total bytes copied, including headers */
@@ -1836,7 +1836,7 @@ xlog_state_clean_log(xlog_t *log)
 				changed = 2;
 			}
 			INT_ZERO(iclog->ic_header.h_num_logops, ARCH_CONVERT);
-			bzero(iclog->ic_header.h_cycle_data,
+			memset(iclog->ic_header.h_cycle_data, 0,
 			      sizeof(iclog->ic_header.h_cycle_data));
 			INT_ZERO(iclog->ic_header.h_lsn, ARCH_CONVERT);
 		} else if (iclog->ic_state == XLOG_STATE_ACTIVE)
@@ -2168,7 +2168,7 @@ xlog_state_done_syncing(
 
 
 /*
- * Update counters atomically now that bcopy is done.
+ * Update counters atomically now that memcpy is done.
  */
 /* ARGSUSED */
 static inline void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7c18ed89857d..558cec451a47 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1252,7 +1252,7 @@ xlog_recover_add_to_cont_trans(xlog_recover_t	*trans,
 		/* finish copying rest of trans header */
 		xlog_recover_add_item(&trans->r_itemq);
 		ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
-		bcopy(dp, ptr, len); /* s, d, l */
+		memcpy(ptr, dp, len); /* d, s, l */
 		return 0;
 	}
 	item = item->ri_prev;
@@ -1261,7 +1261,7 @@ xlog_recover_add_to_cont_trans(xlog_recover_t	*trans,
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
 
 	ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
-	bcopy(dp , &ptr[old_len], len); /* s, d, l */
+	memcpy(&ptr[old_len], dp, len); /* d, s, l */
 	item->ri_buf[item->ri_cnt-1].i_len += len;
 	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
 	return 0;
@@ -1292,7 +1292,7 @@ xlog_recover_add_to_trans(xlog_recover_t	*trans,
 	if (!len)
 		return 0;
 	ptr = kmem_zalloc(len, 0);
-	bcopy(dp, ptr, len);
+	memcpy(ptr, dp, len);
 
 	in_f = (xfs_inode_log_format_t *)ptr;
 	item = trans->r_itemq;
@@ -1300,7 +1300,7 @@ xlog_recover_add_to_trans(xlog_recover_t	*trans,
 		ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
 		if (len == sizeof(xfs_trans_header_t))
 			xlog_recover_add_item(&trans->r_itemq);
-		bcopy(dp, &trans->r_theader, len); /* s, d, l */
+		memcpy(&trans->r_theader, dp, len); /* d, s, l */
 		return 0;
 	}
 	if (item->ri_prev->ri_total != 0 &&
@@ -1809,9 +1809,10 @@ xlog_recover_do_reg_buffer(xfs_mount_t		*mp,
 					       "dquot_buf_recover");
 		}
 		if (!error)
-		    bcopy(item->ri_buf[i].i_addr,		   /* source */
-		      xfs_buf_offset(bp, (uint)bit << XFS_BLI_SHIFT), /* dest */
-		      nbits<<XFS_BLI_SHIFT);			   /* length */
+			memcpy(xfs_buf_offset(bp,
+					(uint)bit << XFS_BLI_SHIFT),	/* dest */
+				item->ri_buf[i].i_addr,			/* source */
+				nbits<<XFS_BLI_SHIFT);			/* length */
 		i++;
 		bit += nbits;
 	}
@@ -2125,9 +2126,9 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 			      -1, ARCH_CONVERT);
 	/* the rest is in on-disk format */
 	if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) {
-		bcopy(item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
-		      (xfs_caddr_t) dip		 + sizeof(xfs_dinode_core_t),
-		      item->ri_buf[1].i_len  - sizeof(xfs_dinode_core_t));
+		memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t),
+			item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
+			item->ri_buf[1].i_len  - sizeof(xfs_dinode_core_t));
 	}
 
 	fields = in_f->ilf_fields;
@@ -2153,7 +2154,7 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 	switch (fields & XFS_ILOG_DFORK) {
 	case XFS_ILOG_DDATA:
 	case XFS_ILOG_DEXT:
-		bcopy(src, &dip->di_u, len);
+		memcpy(&dip->di_u, src, len);
 		break;
 
 	case XFS_ILOG_DBROOT:
@@ -2192,7 +2193,7 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 		case XFS_ILOG_AEXT:
 			dest = XFS_DFORK_APTR(dip);
 			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
-			bcopy(src, dest, len);
+			memcpy(dest, src, len);
 			break;
 
 		case XFS_ILOG_ABROOT:
@@ -2351,7 +2352,7 @@ xlog_recover_do_dquot_trans(xlog_t		*log,
 		return XFS_ERROR(EIO);
 	}
 
-	bcopy(recddq, ddq, item->ri_buf[1].i_len);
+	memcpy(ddq, recddq, item->ri_buf[1].i_len);
 
 	ASSERT(dq_f->qlf_size == 2);
 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
@@ -2392,7 +2393,7 @@ xlog_recover_do_efi_trans(xlog_t		*log,
 
 	mp = log->l_mp;
 	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-	bcopy((char *)efi_formatp, (char *)&(efip->efi_format),
+	memcpy((char *)&(efip->efi_format), (char *)efi_formatp,
 	      sizeof(xfs_efi_log_format_t) +
 	      ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t)));
 	efip->efi_next_extent = efi_formatp->efi_nextents;
@@ -3225,7 +3226,7 @@ xlog_do_recovery_pass(xlog_t	*log,
 	return ENOMEM;
     }
 
-    bzero(rhash, sizeof(rhash));
+    memset(rhash, 0, sizeof(rhash));
     if (tail_blk <= head_blk) {
 	for (blk_no = tail_blk; blk_no < head_blk; ) {
 	    if ((error = xlog_bread(log, blk_no, hblks, hbp)))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 74b345fd3669..2d0203b6836f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -382,9 +382,9 @@ xfs_xlatesb(
 		    size == 1 ||
 		    xfs_sb_info[f].type == 1) {
 			if (dir > 0) {
-				bcopy(buf_ptr + first, mem_ptr + first, size);
+				memcpy(mem_ptr + first, buf_ptr + first, size);
 			} else {
-				bcopy(mem_ptr + first, buf_ptr + first, size);
+				memcpy(buf_ptr + first, mem_ptr + first, size);
 			}
 		} else {
 			switch (size) {
@@ -680,7 +680,7 @@ xfs_mountfs(
 		}
 		uuid_mounted=1;
 		ret64 = uuid_hash64(&sbp->sb_uuid);
-		bcopy(&ret64, &vfsp->vfs_fsid, sizeof(ret64));
+		memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
 	}
 
 	/*
@@ -1139,7 +1139,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	/*
 	 * clear all error tags on this filesystem
 	 */
-	bcopy(&(XFS_MTOVFS(mp)->vfs_fsid), &fsid, sizeof(int64_t));
+	memcpy(&fsid, &(XFS_MTOVFS(mp)->vfs_fsid), sizeof(int64_t));
 	(void) xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
 #endif
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index afeb5cce9b0c..2256b5d8bb0b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1427,7 +1427,7 @@ xfs_qm_qino_alloc(
 		xfs_trans_cancel(tp, 0);
 		return (error);
 	}
-	bzero(&zerocr, sizeof(zerocr));
+	memset(&zerocr, 0, sizeof(zerocr));
 
 	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, 0,
 				   &zerocr, 0, 1, ip, &committed))) {
@@ -2417,11 +2417,11 @@ xfs_qm_dqalloc_incore(
 		if ((dqp = xfs_qm_dqreclaim_one())) {
 			XFS_STATS_INC(xfsstats.xs_qm_dqreclaims);
 			/*
-			 * Just bzero the core here. The rest will get
+			 * Just zero the core here. The rest will get
 			 * reinitialized by caller. XXX we shouldn't even
-			 * do this bzero ...
+			 * do this zero ...
 			 */
-			bzero(&dqp->q_core, sizeof(dqp->q_core));
+			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 			*O_dqpp = dqp;
 			return (B_FALSE);
 		}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 73f22a296ec8..976aed6b46dd 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -550,7 +550,7 @@ xfs_qm_scall_getqstat(
 
 	uip = gip = NULL;
 	tempuqip = tempgqip = B_FALSE;
-	bzero(out, sizeof(fs_quota_stat_t));
+	memset(out, 0, sizeof(fs_quota_stat_t));
 
 	out->qs_version = FS_QSTAT_VERSION;
 	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
@@ -885,7 +885,7 @@ xfs_qm_export_dquot(
 	xfs_disk_dquot_t	*src,
 	struct fs_disk_quota	*dst)
 {
-	bzero(dst, sizeof(*dst));
+	memset(dst, 0, sizeof(*dst));
 	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
 	dst->d_flags =
 		xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 82684350d6eb..6ddf6e0035d5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -170,7 +170,7 @@ xfs_growfs_rt_alloc(
 				error = XFS_ERROR(EIO);
 				goto error_exit;
 			}
-			bzero(XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+			memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 			/*
 			 * Commit the transaction.
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index e6bcf374e54c..f187135a1cef 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -880,7 +880,7 @@ xfs_trans_binval(
 	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
 	bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
 	bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
-	bzero((char *)(bip->bli_format.blf_data_map),
+	memset((char *)(bip->bli_format.blf_data_map), 0,
 	      (bip->bli_format.blf_map_size * sizeof(uint)));
 	lidp->lid_flags |= XFS_LID_DIRTY;
 	tp->t_flags |= XFS_TRANS_DIRTY;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b7fbf81b00da..0df7fc3075cf 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -414,20 +414,8 @@ xfs_trans_inode_broot_debug(
 		iip->ili_root_size = ip->i_df.if_broot_bytes;
 		iip->ili_orig_root =
 			(char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
-		bcopy((char*)(ip->i_df.if_broot), iip->ili_orig_root,
+		memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
 		      iip->ili_root_size);
 	}
 }
 #endif
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 54ed6fec00a5..d82bf3a6a7be 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -345,7 +345,7 @@ xfs_bump_ino_vers2(
 
 	ip->i_d.di_version = XFS_DINODE_VERSION_2;
 	ip->i_d.di_onlink = 0;
-	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 	mp = tp->t_mountp;
 	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
 		s = XFS_SB_LOCK(mp);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4f97135ba1ef..9b76c90bb544 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3702,7 +3702,7 @@ xfs_symlink(
 	 */
 	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
 		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
-		bcopy(target_path, ip->i_df.if_u1.if_data, pathlen);
+		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
 		ip->i_d.di_size = pathlen;
 
 		/*
@@ -3743,7 +3743,7 @@ xfs_symlink(
 			}
 			pathlen -= byte_cnt;
 
-			bcopy(cur_chunk, XFS_BUF_PTR(bp), byte_cnt);
+			memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
 			cur_chunk += byte_cnt;
 
 			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
@@ -3859,10 +3859,10 @@ xfs_fid2(
 	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
 	xfid->fid_pad = 0;
 	/*
-	 * use bcopy because the inode is a long long and there's no
+	 * use memcpy because the inode is a long long and there's no
 	 * assurance that xfid->fid_ino is properly aligned.
 	 */
-	bcopy(&ip->i_ino, &xfid->fid_ino, sizeof xfid->fid_ino);
+	memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino));
 	xfid->fid_gen = ip->i_d.di_gen;
 
 	return 0;
@@ -4504,9 +4504,9 @@ xfs_zero_remaining_bytes(
 					  mp, bp, XFS_BUF_ADDR(bp));
 			break;
 		}
-		bzero(XFS_BUF_PTR(bp) +
+		memset(XFS_BUF_PTR(bp) +
 			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
-		      lastoffset - offset + 1);
+		      0, lastoffset - offset + 1);
 		XFS_BUF_UNDONE(bp);
 		XFS_BUF_UNREAD(bp);
 		XFS_BUF_WRITE(bp);
-- 
cgit v1.2.3


From 9d4513185df5b595b217781231883a5255b71f6e Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 15 Oct 2002 03:08:08 +0200
Subject: XFS: remove a no-longer-used conditional macro.

Modid: 2.5.x-xfs:slinx:129574a
---
 fs/xfs/xfs_types.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 9bc2d90d426f..e08f8b727a65 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -308,13 +308,9 @@ extern struct xfsstats xfsstats;
 #endif	/* !CONFIG_PROC_FS */
 
 
-
-/* juggle IRIX device numbers - still used in ondisk structures */
-
-#ifndef __KERNEL__
-#define MKDEV(major, minor)	makedev(major, minor)
-#endif
-
+/*
+ * Juggle IRIX device numbers - still used in ondisk structures
+ */
 #define XFS_DEV_BITSMAJOR	14
 #define XFS_DEV_BITSMINOR	18
 #define XFS_DEV_MAXMAJ		0x1ff
-- 
cgit v1.2.3


From 34f960bcdc758f99ea37935323fe3906a856f17d Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 03:08:58 +0200
Subject: XFS: Re-sync pagebuf flags in xfsidbg (missed last time...)

Modid: 2.5.x-xfs:slinx:129660a
---
 fs/xfs/xfsidbg.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 3f7ffef71667..5c70b8a77c7e 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -1703,10 +1703,10 @@ static char	*pb_flag_vals[] = {
 /*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL", "ASYNC",
 /*  5 */ "NONE", "DELWRI", "FREED", "SYNC", "MAPPABLE",
 /* 10 */ "STALE", "FS_MANAGED", "INVALID12", "LOCK", "TRYLOCK",
-/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "INVALID19",
-/* 20 */ "LOCKABLE", "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED",
-	 "MEM_ALLOCATED",
-/* 25 */ "FORCEIO", "FLUSH", "READ_AHEAD", "INVALID28", "INVALID29",
+/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "LOCKABLE",
+/* 20 */ "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED", "MEM_ALLOCATED",
+	 "FORCEIO",
+/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29", 
 /* 30 */ "INVALID30", "INVALID31",
 	 NULL };
 
-- 
cgit v1.2.3


From bd8446b801b93a8d5a748189797c360701c06dcc Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:10:09 +0200
Subject: XFS: bring the 32 bit inode flag back into line with the Irix
 version.

Modid: 2.5.x-xfs:slinx:128653a
---
 fs/xfs/linux/xfs_super.c | 3 +++
 fs/xfs/xfs_clnt.h        | 6 +++---
 fs/xfs/xfs_vfsops.c      | 3 +--
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 5c32e8c489fb..27be367ec316 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -121,6 +121,9 @@ xfs_parseargs(
 
 	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
 
+	/* Default to 32 bit inodes on linux all the time */
+	args->flags |= XFSMNT_32BITINODES;
+
 	/* Copy the already-parsed mount(2) flags we're interested in */
 	if (flags & MS_NOATIME)
 		args->flags |= XFSMNT_NOATIME;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 05df613952c6..fd7d80ebe2ef 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -112,13 +112,13 @@ struct xfs_mount_args {
 						/* only) */
 #define XFSMNT_NOTSERVER	0x00100000	/* give up being the server */
 						/* (remount only) */
-#define XFSMNT_DMAPI		0x00200000	/* enable dmapi/xdsm */
+#define XFSMNT_32BITINODES	0x00200000	/* restrict inodes to 32
+						 * bits of address space */
 #define XFSMNT_GQUOTA		0x00400000	/* group quota accounting */
 #define XFSMNT_GQUOTAENF	0x00800000	/* group quota limit
 						 * enforcement */
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
-#define XFSMNT_32BITINODES	0x02000000	/* restrict inodes to 32
-						 * bits of address space */
+#define XFSMNT_DMAPI		0x02000000	/* enable dmapi/xdsm */
 #define XFSMNT_NOLOGFLUSH	0x04000000	/* Don't flush for log blocks */
 
 /* Did we get any args for CXFS to consume? */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 3604040fe8bf..bcd2efd1ce0d 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -274,8 +274,7 @@ xfs_start_flags(
 	if (ap->flags & XFSMNT_OSYNCISOSYNC)
 		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
 
-	/* Default on Linux */
-	if (1 || ap->flags & XFSMNT_32BITINODES)
+	if (ap->flags & XFSMNT_32BITINODES)
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 
 	if (ap->flags & XFSMNT_IOSIZE) {
-- 
cgit v1.2.3


From 4b3a71ed01e4293b1aa8f268e227d44fd31a5b06 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:12:04 +0200
Subject: XFS: remove some bit shifting constants we do not use

Modid: 2.5.x-xfs:slinx:128887a
---
 fs/xfs/xfs_fs.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index e29586bbb4fa..0311d1258076 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -498,13 +498,5 @@ typedef struct xfs_handle {
 #define BTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
 #define BTOBBT(bytes)	((__u64)(bytes) >> BBSHIFT)
 #define BBTOB(bbs)	((bbs) << BBSHIFT)
-#define OFFTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
-#define OFFTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT)
-#define BBTOOFF(bbs)	((__u64)(bbs) << BBSHIFT)
-
-#define SEEKLIMIT32	0x7fffffff
-#define BBSEEKLIMIT32	BTOBBT(SEEKLIMIT32)
-#define SEEKLIMIT	0x7fffffffffffffffLL
-#define BBSEEKLIMIT	OFFTOBBT(SEEKLIMIT)
 
 #endif	/* _LINUX_XFS_FS_H */
-- 
cgit v1.2.3


From 1598e9c357bbedfd8f4d6c6269ac2eaa09a76faf Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:13:31 +0200
Subject: XFS: remove some 'temporary debugging code'

(has been there a couple of years at least)

Modid: 2.5.x-xfs:slinx:128918a
---
 fs/xfs/xfs_bmap.c | 60 -------------------------------------------------------
 1 file changed, 60 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 44e3aa012c17..d947d23fa19f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4373,28 +4373,9 @@ xfs_bmap_read_extents(
 			break;
 		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
 			1, mp->m_bmap_dmxr[1]);
-#ifndef __KERNEL__
 		XFS_WANT_CORRUPTED_GOTO(
 			XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)),
 			error0);
-#else	/* additional, temporary, debugging code */
-		if (!(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)))) {
-			cmn_err(CE_NOTE,
-			"xfs_bmap_read_extents: FSB Sanity Check:");
-			if (!(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount))
-				cmn_err(CE_NOTE,
-					"bad AG count %d < agcount %d",
-					XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)),
-					mp->m_sb.sb_agcount);
-			if (!(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks))
-				cmn_err(CE_NOTE,
-					"bad AG BNO %d < %d",
-					XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)),
-					mp->m_sb.sb_agblocks);
-			error = XFS_ERROR(EFSCORRUPTED);
-			goto error0;
-		}
-#endif
 		bno = INT_GET(*pp, ARCH_CONVERT);
 		xfs_trans_brelse(tp, bp);
 	}
@@ -4422,35 +4403,9 @@ xfs_bmap_read_extents(
 				(unsigned long long) ip->i_ino);
 			goto error0;
 		}
-#ifndef __KERNEL__
 		XFS_WANT_CORRUPTED_GOTO(
 			XFS_BMAP_SANITY_CHECK(mp, block, 0),
 			error0);
-#else	/* additional, temporary, debugging code */
-		if (!(XFS_BMAP_SANITY_CHECK(mp, block, 0))) {
-			cmn_err(CE_NOTE,
-			"xfs_bmap_read_extents: BMAP Sanity Check:");
-			if (!(INT_GET(block->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC))
-				cmn_err(CE_NOTE,
-					"bb_magic 0x%x",
-					INT_GET(block->bb_magic, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_level, ARCH_CONVERT) == level))
-				cmn_err(CE_NOTE,
-					"bb_level %d",
-					INT_GET(block->bb_level, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_numrecs, ARCH_CONVERT) > 0))
-				cmn_err(CE_NOTE,
-					"bb_numrecs %d",
-					INT_GET(block->bb_numrecs, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0]))
-				cmn_err(CE_NOTE,
-					"bb_numrecs %d < m_bmap_dmxr[] %d",
-					INT_GET(block->bb_numrecs, ARCH_CONVERT),
-					(mp)->m_bmap_dmxr[(level) != 0]);
-			error = XFS_ERROR(EFSCORRUPTED);
-			goto error0;
-		}
-#endif
 		/*
 		 * Read-ahead the next leaf block, if any.
 		 */
@@ -4650,11 +4605,6 @@ xfs_bmapi(
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	mp = ip->i_mount;
@@ -5150,11 +5100,6 @@ xfs_bmapi_single(
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 	       return XFS_ERROR(EFSCORRUPTED);
 	}
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -5228,11 +5173,6 @@ xfs_bunmapi(
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	mp = ip->i_mount;
-- 
cgit v1.2.3


From 37183a5f29396b9c039d2a9303366e337a700258 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:16:03 +0200
Subject: XFS: Switch to native endian internal representation for extents

Switch xfs from using a big endian internal representation for
the in memory copy of extents to a host byte order representation.
The internal extents are read in once, then modified seperately
from the on disk ones. Since we search and manipulate the extents
multiple times, it is cheaper to convert them to host byte order
once and then keep them in that format. Worth about 5 to 10%
reduction in cpu time for some loads. Complicated by the fact
that the in memory extents are written out to the log sometimes,
and when expanding extents are used to write out the initial
block of extents.

Modid: 2.5.x-xfs:slinx:129646a
---
 fs/xfs/xfs_bmap.c       |  28 ++---
 fs/xfs/xfs_bmap_btree.c | 273 +++++++++++++++++++++++++++++++++++++++---------
 fs/xfs/xfs_bmap_btree.h |  55 ++++++++++
 fs/xfs/xfs_btree.c      |   6 +-
 fs/xfs/xfs_inode.c      |  66 ++++++------
 fs/xfs/xfs_inode.h      |   2 +-
 fs/xfs/xfs_inode_item.c |  36 ++++++-
 fs/xfs/xfs_inode_item.h |   5 +-
 fs/xfs/xfsidbg.c        |   2 +-
 9 files changed, 371 insertions(+), 102 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index d947d23fa19f..d89a4a83d611 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3174,7 +3174,7 @@ xfs_bmap_extents_to_btree(
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
 	int			error;		/* error return value */
-	xfs_extnum_t		i;		/* extent list index */
+	xfs_extnum_t		i, cnt;		/* extent list index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
 	xfs_mount_t		*mp;		/* mount structure */
@@ -3256,24 +3256,25 @@ xfs_bmap_extents_to_btree(
 	ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
 	INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
 	INT_ZERO(ablock->bb_level, ARCH_CONVERT);
-	INT_ZERO(ablock->bb_numrecs, ARCH_CONVERT);
 	INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
 	INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = ifp->if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
 		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
-			*arp++ = *ep;
-			INT_MOD(ablock->bb_numrecs, ARCH_CONVERT, +1);
+			arp->l0 = INT_GET(ep->l0, ARCH_CONVERT);
+			arp->l1 = INT_GET(ep->l1, ARCH_CONVERT);
+			arp++; cnt++;
 		}
 	}
+	INT_SET(ablock->bb_numrecs, ARCH_CONVERT, cnt);
 	ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
 	/*
 	 * Fill in the root key and pointer.
 	 */
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
-	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(arp));
+	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp));
 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
 	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
 	/*
@@ -4332,7 +4333,7 @@ xfs_bmap_read_extents(
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_bmap_read_extents";
 #endif
-	xfs_extnum_t		i;	/* index into the extents list */
+	xfs_extnum_t		i, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
@@ -4389,7 +4390,7 @@ xfs_bmap_read_extents(
 	 * Loop over all leaf nodes.  Copy information to the extent list.
 	 */
 	for (;;) {
-		xfs_bmbt_rec_t	*frp;
+		xfs_bmbt_rec_t	*frp, *temp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
 
@@ -4417,18 +4418,21 @@ xfs_bmap_read_extents(
 		 */
 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-		memcpy(trp, frp, num_recs * sizeof(*frp));
+		temp = trp;
+		for (j = 0; j < num_recs; j++, frp++, trp++) {
+			trp->l0 = INT_GET(frp->l0, ARCH_CONVERT);
+			trp->l1 = INT_GET(frp->l1, ARCH_CONVERT);
+		}
 		if (exntf == XFS_EXTFMT_NOSTATE) {
 			/*
 			 * Check all attribute bmap btree records and
 			 * any "older" data bmap btree records for a
 			 * set bit in the "extent flag" position.
 			 */
-			if (xfs_check_nostate_extents(trp, num_recs)) {
+			if (xfs_check_nostate_extents(temp, num_recs)) {
 				goto error0;
 			}
 		}
-		trp += num_recs;
 		i += num_recs;
 		xfs_trans_brelse(tp, bp);
 		bno = nextbno;
@@ -6257,7 +6261,7 @@ xfs_bmap_count_leaves(
 	int		b;
 
 	for ( b = 1; b <= numrecs; b++, frp++)
-		*count += xfs_bmbt_get_blockcount(frp);
+		*count += xfs_bmbt_disk_get_blockcount(frp);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4e41699fbb63..f13181dd97c8 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -207,7 +207,7 @@ xfs_bmbt_trace_argifr(
 	xfs_bmbt_irec_t		s;
 
 	d = (xfs_dfsbno_t)f;
-	xfs_bmbt_get_all(r, &s);
+	xfs_bmbt_disk_get_all(r, &s);
 	o = (xfs_dfiloff_t)s.br_startoff;
 	b = (xfs_dfsbno_t)s.br_startblock;
 	c = s.br_blockcount;
@@ -396,7 +396,7 @@ xfs_bmbt_delrec(
 			xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
 		}
 		if (ptr == 1) {
-			INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rp));
+			INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(rp));
 			kp = &key;
 		}
 	}
@@ -711,10 +711,10 @@ xfs_bmbt_get_rec(
 		return 0;
 	}
 	rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
-	*off = xfs_bmbt_get_startoff(rp);
-	*bno = xfs_bmbt_get_startblock(rp);
-	*len = xfs_bmbt_get_blockcount(rp);
-	*state = xfs_bmbt_get_state(rp);
+	*off = xfs_bmbt_disk_get_startoff(rp);
+	*bno = xfs_bmbt_disk_get_startblock(rp);
+	*len = xfs_bmbt_disk_get_blockcount(rp);
+	*state = xfs_bmbt_disk_get_state(rp);
 	*stat = 1;
 	return 0;
 }
@@ -757,7 +757,8 @@ xfs_bmbt_insrec(
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
 	XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
 	ncur = (xfs_btree_cur_t *)0;
-	INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(recp));
+	INT_SET(key.br_startoff, ARCH_CONVERT,
+		xfs_bmbt_disk_get_startoff(recp));
 	optr = ptr = cur->bc_ptrs[level];
 	if (ptr == 0) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -835,7 +836,7 @@ xfs_bmbt_insrec(
 						}
 #endif
 						ptr = cur->bc_ptrs[level];
-						xfs_bmbt_set_allf(&nrec,
+						xfs_bmbt_disk_set_allf(&nrec,
 							nkey.br_startoff, 0, 0,
 							XFS_EXT_NORM);
 					} else {
@@ -1175,7 +1176,7 @@ xfs_bmbt_lookup(
 					startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT);
 				} else {
 					krp = krbase + keyno - 1;
-					startoff = xfs_bmbt_get_startoff(krp);
+					startoff = xfs_bmbt_disk_get_startoff(krp);
 				}
 				diff = (xfs_sfiloff_t)
 						(startoff - rp->br_startoff);
@@ -1356,7 +1357,8 @@ xfs_bmbt_lshift(
 	} else {
 		memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
-		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		INT_SET(key.br_startoff, ARCH_CONVERT,
+			xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
 	if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
@@ -1470,7 +1472,8 @@ xfs_bmbt_rshift(
 		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		INT_SET(key.br_startoff, ARCH_CONVERT,
+			xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
@@ -1639,7 +1642,7 @@ xfs_bmbt_split(
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
 		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		keyp->br_startoff = xfs_bmbt_get_startoff(rrp);
+		keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp);
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
 	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
@@ -1874,17 +1877,16 @@ xfs_bmbt_delete(
  * This code must be in sync with the routines xfs_bmbt_get_startoff,
  * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
  */
-void
-xfs_bmbt_get_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s)
+
+static __inline__ void
+__xfs_bmbt_get_all(
+		__uint64_t l0,
+		__uint64_t l1,
+		xfs_bmbt_irec_t *s)
 {
 	int	ext_flag;
 	xfs_exntst_t st;
-	__uint64_t	l0, l1;
 
-	l0 = INT_GET(r->l0, ARCH_CONVERT);
-	l1 = INT_GET(r->l1, ARCH_CONVERT);
 	ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
 	s->br_startoff = ((xfs_fileoff_t)l0 &
 			   XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
@@ -1898,6 +1900,8 @@ xfs_bmbt_get_all(
 
 		b = (((xfs_dfsbno_t)l0 & XFS_MASK64LO(9)) << 43) |
 		    (((xfs_dfsbno_t)l1) >> 21);
+		if (!((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)))
+			printk("b == 0x%llx NULL %d\n", b, ISNULLDSTARTBLOCK(b));
 		ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
 		s->br_startblock = (xfs_fsblock_t)b;
 	}
@@ -1915,6 +1919,14 @@ xfs_bmbt_get_all(
 	s->br_state = st;
 }
 
+void
+xfs_bmbt_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	__xfs_bmbt_get_all(r->l0, r->l1, s);
+}
+
 /*
  * Get the block pointer for the given level of the cursor.
  * Fill in the buffer pointer, if applicable.
@@ -1941,22 +1953,93 @@ xfs_bmbt_get_block(
 }
 
 /*
- * Extract the blockcount field from a bmap extent record.
+ * Extract the blockcount field from an in memory bmap extent record.
  */
 xfs_filblks_t
 xfs_bmbt_get_blockcount(
 	xfs_bmbt_rec_t	*r)
 {
-	return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+	return (xfs_filblks_t)(r->l1 & XFS_MASK64LO(21));
 }
 
 /*
- * Extract the startblock field from a bmap extent record.
+ * Extract the startblock field from an in memory bmap extent record.
  */
 xfs_fsblock_t
 xfs_bmbt_get_startblock(
 	xfs_bmbt_rec_t	*r)
 {
+#if XFS_BIG_FILESYSTEMS
+	return (((xfs_fsblock_t)r->l0 & XFS_MASK64LO(9)) << 43) |
+	       (((xfs_fsblock_t)r->l1) >> 21);
+#else
+#ifdef DEBUG
+	xfs_dfsbno_t	b;
+
+	b = (((xfs_dfsbno_t)r->l0 & XFS_MASK64LO(9)) << 43) |
+	    (((xfs_dfsbno_t)r->l1) >> 21);
+	ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+	return (xfs_fsblock_t)b;
+#else	/* !DEBUG */
+	return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+/*
+ * Extract the startoff field from an in memory bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+	xfs_bmbt_rec_t	*r)
+{
+	return ((xfs_fileoff_t)r->l0 &
+		 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+}
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+	xfs_bmbt_rec_t	*r)
+{
+	int	ext_flag;
+
+	ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
+	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+				ext_flag);
+}
+
+#if ARCH_CONVERT != ARCH_NOCONVERT
+/* Endian flipping versions of the bmbt extraction functions */
+void
+xfs_bmbt_disk_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	__uint64_t	l0, l1;
+
+	l0 = INT_GET(r->l0, ARCH_CONVERT);
+	l1 = INT_GET(r->l1, ARCH_CONVERT);
+
+	__xfs_bmbt_get_all(l0, l1, s);
+}
+
+/*
+ * Extract the blockcount field from an on disk bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_disk_get_blockcount(
+	xfs_bmbt_rec_t	*r)
+{
+	return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+}
+
+/*
+ * Extract the startblock field from an on disk bmap extent record.
+ */
+xfs_fsblock_t
+xfs_bmbt_disk_get_startblock(
+	xfs_bmbt_rec_t	*r)
+{
 #if XFS_BIG_FILESYSTEMS
 	return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
 	       (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
@@ -1975,10 +2058,10 @@ xfs_bmbt_get_startblock(
 }
 
 /*
- * Extract the startoff field from a bmap extent record.
+ * Extract the startoff field from a disk format bmap extent record.
  */
 xfs_fileoff_t
-xfs_bmbt_get_startoff(
+xfs_bmbt_disk_get_startoff(
 	xfs_bmbt_rec_t	*r)
 {
 	return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
@@ -1986,15 +2069,16 @@ xfs_bmbt_get_startoff(
 }
 
 xfs_exntst_t
-xfs_bmbt_get_state(
+xfs_bmbt_disk_get_state(
 	xfs_bmbt_rec_t	*r)
 {
 	int	ext_flag;
 
 	ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
-	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+	return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
 				ext_flag);
 }
+#endif
 
 
 /*
@@ -2103,7 +2187,7 @@ xfs_bmbt_insert(
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
 	level = 0;
 	nbno = NULLFSBLOCK;
-	xfs_bmbt_set_all(&nrec, &cur->bc_rec.b);
+	xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
 	ncur = (xfs_btree_cur_t *)0;
 	pcur = cur;
 	do {
@@ -2387,6 +2471,97 @@ xfs_bmbt_set_all(
 #if XFS_BIG_FILESYSTEMS
 	ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
 #endif	/* XFS_BIG_FILESYSTEMS */
+#if XFS_BIG_FILESYSTEMS
+	r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+		 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+		 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43);
+	r->l1 = ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+		 ((xfs_bmbt_rec_base_t)s->br_blockcount &
+		 (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(s->br_startblock)) {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+			  (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	} else {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9);
+		r->l1 = ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v)
+{
+	int	extent_flag;
+
+	ASSERT((v == XFS_EXT_NORM) || (v == XFS_EXT_UNWRITTEN));
+	extent_flag = (v == XFS_EXT_NORM) ? 0 : 1;
+	ASSERT((o & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0);
+	ASSERT((c & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((b & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if XFS_BIG_FILESYSTEMS
+	r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+		((xfs_bmbt_rec_base_t)o << 9) |
+		((xfs_bmbt_rec_base_t)b >> 43));
+	r->l1 = ((xfs_bmbt_rec_base_t)b << 21) |
+		((xfs_bmbt_rec_base_t)c &
+		(xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(b)) {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9) |
+			 (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)b << 21) |
+			  ((xfs_bmbt_rec_base_t)c &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	} else {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9);
+		r->l1 = ((xfs_bmbt_rec_base_t)b << 21) |
+			 ((xfs_bmbt_rec_base_t)c &
+			 (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+#if ARCH_CONVERT != ARCH_NOCONVERT
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+void
+xfs_bmbt_disk_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	int	extent_flag;
+
+	ASSERT((s->br_state == XFS_EXT_NORM) ||
+		(s->br_state == XFS_EXT_UNWRITTEN));
+	extent_flag = (s->br_state == XFS_EXT_NORM) ? 0 : 1;
+	ASSERT((s->br_startoff & XFS_MASK64HI(9)) == 0);
+	ASSERT((s->br_blockcount & XFS_MASK64HI(43)) == 0);
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
 #if XFS_BIG_FILESYSTEMS
 	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
 		  ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
@@ -2414,10 +2589,10 @@ xfs_bmbt_set_all(
 }
 
 /*
- * Set all the fields in a bmap extent record from the arguments.
+ * Set all the fields in a disk format bmap extent record from the arguments.
  */
 void
-xfs_bmbt_set_allf(
+xfs_bmbt_disk_set_allf(
 	xfs_bmbt_rec_t	*r,
 	xfs_fileoff_t	o,
 	xfs_fsblock_t	b,
@@ -2458,6 +2633,7 @@ xfs_bmbt_set_allf(
 	}
 #endif	/* XFS_BIG_FILESYSTEMS */
 }
+#endif
 
 /*
  * Set the blockcount field in a bmap extent record.
@@ -2468,8 +2644,8 @@ xfs_bmbt_set_blockcount(
 	xfs_filblks_t	v)
 {
 	ASSERT((v & XFS_MASK64HI(43)) == 0);
-	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
-		  (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21)));
+	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
+		  (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21));
 }
 
 /*
@@ -2484,20 +2660,20 @@ xfs_bmbt_set_startblock(
 	ASSERT((v & XFS_MASK64HI(12)) == 0);
 #endif	/* XFS_BIG_FILESYSTEMS */
 #if XFS_BIG_FILESYSTEMS
-	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
-		  (xfs_bmbt_rec_base_t)(v >> 43));
-	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
-		  (xfs_bmbt_rec_base_t)(v << 21));
+	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
+		  (xfs_bmbt_rec_base_t)(v >> 43);
+	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
+		  (xfs_bmbt_rec_base_t)(v << 21);
 #else	/* !XFS_BIG_FILESYSTEMS */
 	if (ISNULLSTARTBLOCK(v)) {
-		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) | (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
-		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
+		r->l0 |= (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
 			  ((xfs_bmbt_rec_base_t)v << 21) |
-			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+			  (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
 	} else {
-		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
-		INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 21) |
-			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+		r->l0 &= ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
+			  (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
 	}
 #endif	/* XFS_BIG_FILESYSTEMS */
 }
@@ -2511,9 +2687,9 @@ xfs_bmbt_set_startoff(
 	xfs_fileoff_t	v)
 {
 	ASSERT((v & XFS_MASK64HI(9)) == 0);
-	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
+	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
 		((xfs_bmbt_rec_base_t)v << 9) |
-		  (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+		  (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
 }
 
 /*
@@ -2526,9 +2702,9 @@ xfs_bmbt_set_state(
 {
 	ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
 	if (v == XFS_EXT_NORM)
-		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN));
+		r->l0 &= XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN);
 	else
-		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN));
+		r->l0 |= XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN);
 }
 
 /*
@@ -2596,7 +2772,7 @@ xfs_bmbt_update(
 #endif
 	ptr = cur->bc_ptrs[0];
 	rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
-	xfs_bmbt_set_allf(rp, off, bno, len, state);
+	xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
 	xfs_bmbt_log_recs(cur, bp, ptr, ptr);
 	if (ptr > 1) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -2618,14 +2794,17 @@ xfs_bmbt_update(
  * Return an error condition (1) if any flags found,
  * otherwise return 0.
  */
+
 int
 xfs_check_nostate_extents(
 	xfs_bmbt_rec_t		*ep,
 	xfs_extnum_t		num)
 {
 	for (; num > 0; num--, ep++) {
-		if (((INT_GET(ep->l0, ARCH_CONVERT)) >>
+		if ((ep->l0 >>
 		     (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
+			printk("Extent at 0x%p value 0x%llx\n", ep,
+				(ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN)));
 			ASSERT(0);
 			return 1;
 		}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index a9ec9c58252d..8aeefd43c967 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -509,6 +509,41 @@ xfs_exntst_t
 xfs_bmbt_get_state(
 	xfs_bmbt_rec_t	*r);
 
+#if ARCH_CONVERT != ARCH_NOCONVERT
+void
+xfs_bmbt_disk_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s);
+
+xfs_exntst_t
+xfs_bmbt_disk_get_state(
+	xfs_bmbt_rec_t	*r);
+
+xfs_filblks_t
+xfs_bmbt_disk_get_blockcount(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fsblock_t
+xfs_bmbt_disk_get_startblock(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fileoff_t
+xfs_bmbt_disk_get_startoff(
+	xfs_bmbt_rec_t	*r);
+
+#else
+#define xfs_bmbt_disk_get_all(r, s) \
+	xfs_bmbt_get_all(r, s)
+#define xfs_bmbt_disk_get_state(r) \
+	xfs_bmbt_get_state(r)
+#define xfs_bmbt_disk_get_blockcount(r) \
+	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startblock(r) \
+	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startoff(r) \
+	xfs_bmbt_get_startoff(r)
+#endif
+
 int
 xfs_bmbt_increment(
 	struct xfs_btree_cur *,
@@ -607,6 +642,26 @@ xfs_bmbt_set_state(
 	xfs_bmbt_rec_t	*r,
 	xfs_exntst_t	v);
 
+#if ARCH_CONVERT != ARCH_NOCONVERT
+void
+xfs_bmbt_disk_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s);
+
+void
+xfs_bmbt_disk_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v);
+#else
+#define xfs_bmbt_disk_set_all(r, s) \
+	xfs_bmbt_set_all(r, s)
+#define xfs_bmbt_disk_set_allf(r, 0, b, c, v) \
+	xfs_bmbt_set_allf(r, 0, b, c, v)
+#endif
+
 void
 xfs_bmbt_to_bmdr(
 	xfs_bmbt_block_t *,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 7dcef68fb253..115b05df35ba 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -261,9 +261,9 @@ xfs_btree_check_rec(
 
 		r1 = ar1;
 		r2 = ar2;
-		ASSERT(xfs_bmbt_get_startoff(r1) +
-		       xfs_bmbt_get_blockcount(r1) <=
-		       xfs_bmbt_get_startoff(r2));
+		ASSERT(xfs_bmbt_disk_get_startoff(r1) +
+		       xfs_bmbt_disk_get_blockcount(r1) <=
+		       xfs_bmbt_disk_get_startoff(r2));
 		break;
 	    }
 	case XFS_BTNUM_INO: {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5f14333dbaff..0ee8eee14756 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -56,8 +56,9 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
  */
 STATIC void
 xfs_validate_extents(
-	xfs_bmbt_rec_32_t	*ep,
+	xfs_bmbt_rec_t		*ep,
 	int			nrecs,
+	int			disk,
 	xfs_exntfmt_t		fmt)
 {
 	xfs_bmbt_irec_t		irec;
@@ -66,14 +67,17 @@ xfs_validate_extents(
 
 	for (i = 0; i < nrecs; i++) {
 		memcpy(&rec, ep, sizeof(rec));
-		xfs_bmbt_get_all(&rec, &irec);
+		if (disk)
+			xfs_bmbt_disk_get_all(&rec, &irec);
+		else
+			xfs_bmbt_get_all(&rec, &irec);
 		if (fmt == XFS_EXTFMT_NOSTATE)
 			ASSERT(irec.br_state == XFS_EXT_NORM);
 		ep++;
 	}
 }
 #else /* DEBUG */
-#define xfs_validate_extents(ep, nrecs, fmt)
+#define xfs_validate_extents(ep, nrecs, disk, fmt)
 #endif /* DEBUG */
 
 /*
@@ -598,9 +602,10 @@ xfs_iformat_extents(
 	int		whichfork)
 {
 	xfs_ifork_t	*ifp;
-	int		nex;
+	int		nex, i;
 	int		real_size;
 	int		size;
+	xfs_bmbt_rec_t	*ep, *dp;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
@@ -633,10 +638,18 @@ xfs_iformat_extents(
 	ifp->if_real_bytes = real_size;
 	if (size) {
 		xfs_validate_extents(
-			(xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
-			nex, XFS_EXTFMT_INODE(ip));
-		memcpy(ifp->if_u1.if_extents,
-			XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), size);
+			(xfs_bmbt_rec_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
+			nex, 1, XFS_EXTFMT_INODE(ip));
+		dp = (xfs_bmbt_rec_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+		ep = ifp->if_u1.if_extents;
+#if ARCH_CONVERT != ARCH_NOCONVERT
+		for (i = 0; i < nex; i++, ep++, dp++) {
+			ep->l0 = INT_GET(dp->l0, ARCH_CONVERT);
+			ep->l1 = INT_GET(dp->l1, ARCH_CONVERT);
+		}
+#else
+		memcpy(ep, dp, size);
+#endif
 		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
 			whichfork);
 		if (whichfork != XFS_DATA_FORK ||
@@ -979,8 +992,8 @@ xfs_iread_extents(
 		ifp->if_flags &= ~XFS_IFEXTENTS;
 		return error;
 	}
-	xfs_validate_extents((xfs_bmbt_rec_32_t *)ifp->if_u1.if_extents,
-		XFS_IFORK_NEXTENTS(ip, whichfork), XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents,
+		XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip));
 	return 0;
 }
 
@@ -2617,11 +2630,11 @@ xfs_iunpin_wait(
 int
 xfs_iextents_copy(
 	xfs_inode_t		*ip,
-	xfs_bmbt_rec_32_t	*buffer,
+	xfs_bmbt_rec_t		*buffer,
 	int			whichfork)
 {
 	int			copied;
-	xfs_bmbt_rec_32_t	*dest_ep;
+	xfs_bmbt_rec_t		*dest_ep;
 	xfs_bmbt_rec_t		*ep;
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_iextents_copy";
@@ -2638,28 +2651,13 @@ xfs_iextents_copy(
 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
 	ASSERT(nrecs > 0);
-	if (nrecs == XFS_IFORK_NEXTENTS(ip, whichfork)) {
-		/*
-		 * There are no delayed allocation extents,
-		 * so just copy everything.
-		 */
-		ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-		ASSERT(ifp->if_bytes ==
-		       (XFS_IFORK_NEXTENTS(ip, whichfork) *
-			(uint)sizeof(xfs_bmbt_rec_t)));
-		memcpy(buffer, ifp->if_u1.if_extents, ifp->if_bytes);
-		xfs_validate_extents(buffer, nrecs, XFS_EXTFMT_INODE(ip));
-		return ifp->if_bytes;
-	}
 
-	ASSERT(whichfork == XFS_DATA_FORK);
 	/*
 	 * There are some delayed allocation extents in the
 	 * inode, so copy the extents one at a time and skip
 	 * the delayed ones.  There must be at least one
 	 * non-delayed extent.
 	 */
-	ASSERT(nrecs > ip->i_d.di_nextents);
 	ep = ifp->if_u1.if_extents;
 	dest_ep = buffer;
 	copied = 0;
@@ -2673,15 +2671,19 @@ xfs_iextents_copy(
 			continue;
 		}
 
-		*dest_ep = *(xfs_bmbt_rec_32_t *)ep;
+#if ARCH_CONVERT != ARCH_NOCONVERT
+		/* Translate to on disk format */
+		dest_ep->l0 = INT_GET(ep->l0, ARCH_CONVERT);
+		dest_ep->l1 = INT_GET(ep->l1, ARCH_CONVERT);
+#else
+		*dest_ep = *ep;
+#endif
 		dest_ep++;
 		ep++;
 		copied++;
 	}
 	ASSERT(copied != 0);
-	ASSERT(copied == ip->i_d.di_nextents);
-	ASSERT((copied * (uint)sizeof(xfs_bmbt_rec_t)) <= XFS_IFORK_DSIZE(ip));
-	xfs_validate_extents(buffer, copied, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip));
 
 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
 }
@@ -2754,7 +2756,7 @@ xfs_iflush_fork(
 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
-			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_32_t *)cp,
+			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
 				whichfork);
 		}
 		break;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ea739492d91e..38669ac946f0 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -520,7 +520,7 @@ void		xfs_iext_realloc(xfs_inode_t *, int, int);
 void		xfs_iroot_realloc(xfs_inode_t *, int, int);
 void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
-int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
 int		xfs_iflush_all(struct xfs_mount *, int);
 int		xfs_ibusy_check(xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 234ef996a8f5..d23d596d7973 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -214,7 +214,7 @@ xfs_inode_item_format(
 	xfs_log_iovec_t		*vecp;
 	xfs_inode_t		*ip;
 	size_t			data_bytes;
-	xfs_bmbt_rec_32_t	*ext_buffer;
+	xfs_bmbt_rec_t		*ext_buffer;
 	int			nrecs;
 	xfs_mount_t		*mp;
 
@@ -314,6 +314,7 @@ xfs_inode_item_format(
 			nrecs = ip->i_df.if_bytes /
 				(uint)sizeof(xfs_bmbt_rec_t);
 			ASSERT(nrecs > 0);
+#if ARCH_CONVERT == ARCH_NOCONVERT
 			if (nrecs == ip->i_d.di_nextents) {
 				/*
 				 * There are no delayed allocation
@@ -323,10 +324,14 @@ xfs_inode_item_format(
 				vecp->i_addr =
 					(char *)(ip->i_df.if_u1.if_extents);
 				vecp->i_len = ip->i_df.if_bytes;
-			} else {
+			} else 
+#endif
+			{
 				/*
 				 * There are delayed allocation extents
-				 * in the inode.  Use xfs_iextents_copy()
+				 * in the inode, or we need to convert
+				 * the extents to on disk format.
+				 * Use xfs_iextents_copy()
 				 * to copy only the real extents into
 				 * a separate buffer.  We'll free the
 				 * buffer in the unlock routine.
@@ -336,7 +341,7 @@ xfs_inode_item_format(
 				iip->ili_extents_buf = ext_buffer;
 				vecp->i_addr = (xfs_caddr_t)ext_buffer;
 				vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
-					XFS_DATA_FORK);
+						XFS_DATA_FORK);
 			}
 			ASSERT(vecp->i_len <= ip->i_df.if_bytes);
 			iip->ili_format.ilf_dsize = vecp->i_len;
@@ -428,6 +433,7 @@ xfs_inode_item_format(
 		ASSERT(!(iip->ili_format.ilf_fields &
 			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
 		if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
+			ASSERT(!(iip->ili_format.ilf_fields & XFS_ILOG_DEXT));
 			ASSERT(ip->i_afp->if_bytes > 0);
 			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
 			ASSERT(ip->i_d.di_anextents > 0);
@@ -437,12 +443,25 @@ xfs_inode_item_format(
 #endif
 			ASSERT(nrecs > 0);
 			ASSERT(nrecs == ip->i_d.di_anextents);
+#if ARCH_CONVERT == ARCH_NOCONVERT
 			/*
 			 * There are not delayed allocation extents
 			 * for attributes, so just point at the array.
 			 */
 			vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
 			vecp->i_len = ip->i_afp->if_bytes;
+#else		
+			ASSERT(iip->ili_aextents_buf == NULL);
+			/*
+			 * Need to endian flip before logging
+			 */
+			ext_buffer = kmem_alloc(ip->i_df.if_bytes,
+				KM_SLEEP);
+			iip->ili_aextents_buf = ext_buffer;
+			vecp->i_addr = (xfs_caddr_t)ext_buffer;
+			vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
+					XFS_ATTR_FORK);
+#endif
 			iip->ili_format.ilf_asize = vecp->i_len;
 			vecp++;
 			nvecs++;
@@ -630,7 +649,6 @@ xfs_inode_item_unlock(
 	 * If the inode needed a separate buffer with which to log
 	 * its extents, then free it now.
 	 */
-	/* FIXME */
 	if (iip->ili_extents_buf != NULL) {
 		ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(ip->i_d.di_nextents > 0);
@@ -639,6 +657,14 @@ xfs_inode_item_unlock(
 		kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
 		iip->ili_extents_buf = NULL;
 	}
+	if (iip->ili_aextents_buf != NULL) {
+		ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
+		ASSERT(ip->i_d.di_anextents > 0);
+		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
+		ASSERT(ip->i_afp->if_bytes > 0);
+		kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+		iip->ili_aextents_buf = NULL;
+	}
 
 	/*
 	 * Figure out if we should unlock the inode or not.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d90407088842..4970205a5e69 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -141,7 +141,10 @@ typedef struct xfs_inode_log_item {
 	unsigned short		ili_flags;	   /* misc flags */
 	unsigned short		ili_logged;	   /* flushed logged data */
 	unsigned int		ili_last_fields;   /* fields when flushed */
-	struct xfs_bmbt_rec_32	*ili_extents_buf;  /* array of logged exts */
+	struct xfs_bmbt_rec_32	*ili_extents_buf;  /* array of logged
+						      data exts */
+	struct xfs_bmbt_rec_32	*ili_aextents_buf; /* array of logged
+						      attr exts */
 	unsigned int		ili_pushbuf_flag;  /* one bit used in push_ail */
 
 #ifdef DEBUG
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 5c70b8a77c7e..b762d3812954 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -2364,7 +2364,7 @@ xfs_btbmap(xfs_bmbt_block_t *bt, int bsz)
 			r = (xfs_bmbt_rec_t *)XFS_BTREE_REC_ADDR(bsz,
 				xfs_bmbt, bt, i, 0);
 
-			xfs_bmbt_get_all((xfs_bmbt_rec_t *)r, &irec);
+			xfs_bmbt_disk_get_all((xfs_bmbt_rec_t *)r, &irec);
 			kdb_printf("rec %d startoff %Ld startblock %Lx blockcount %Ld flag %d\n",
 				i, irec.br_startoff,
 				(__uint64_t)irec.br_startblock, 
-- 
cgit v1.2.3


From 6c005389fa4baf79049b75db094d63c0148893ed Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:17:02 +0200
Subject: XFS: remove debug print statements

Modid: 2.5.x-xfs:slinx:129722a
---
 fs/xfs/xfs_bmap_btree.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index f13181dd97c8..07d513d24f74 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1900,8 +1900,6 @@ __xfs_bmbt_get_all(
 
 		b = (((xfs_dfsbno_t)l0 & XFS_MASK64LO(9)) << 43) |
 		    (((xfs_dfsbno_t)l1) >> 21);
-		if (!((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)))
-			printk("b == 0x%llx NULL %d\n", b, ISNULLDSTARTBLOCK(b));
 		ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
 		s->br_startblock = (xfs_fsblock_t)b;
 	}
@@ -2803,8 +2801,6 @@ xfs_check_nostate_extents(
 	for (; num > 0; num--, ep++) {
 		if ((ep->l0 >>
 		     (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
-			printk("Extent at 0x%p value 0x%llx\n", ep,
-				(ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN)));
 			ASSERT(0);
 			return 1;
 		}
-- 
cgit v1.2.3


From c05feefbf29dafc4a415e18aaa527e05e6dca777 Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:18:15 +0200
Subject: XFS: merge strategy and bmap calls, they are two aspects of the same
 operation

Modid: 2.5.x-xfs:slinx:129728a
---
 fs/xfs/linux/xfs_aops.c  |  14 +--
 fs/xfs/linux/xfs_lrw.c   | 227 +++++++++++++++++++++++------------------------
 fs/xfs/linux/xfs_lrw.h   |   3 +-
 fs/xfs/linux/xfs_vnode.h |  14 +--
 fs/xfs/xfs_vnodeops.c    |   1 -
 5 files changed, 119 insertions(+), 140 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index 2fd598c79e7f..8364f6c3eb41 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -49,17 +49,11 @@ map_blocks(
 	int			error, nmaps = 1;
 
 retry:
-	if (flags & PBF_FILE_ALLOCATE) {
-		VOP_STRATEGY(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	} else {
-		VOP_BMAP(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	}
+	VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
 	if (flags & PBF_WRITE) {
 		if (unlikely((flags & PBF_DIRECT) && nmaps &&
 		    (pbmapp->pbm_flags & PBMF_DELAY))) {
-			flags = PBF_WRITE | PBF_FILE_ALLOCATE;
+			flags = PBF_FILE_ALLOCATE;
 			goto retry;
 		}
 		VMODIFY(vp);
@@ -404,7 +398,7 @@ delalloc_convert(
 		if (buffer_delay(bh)) {
 			if (!mp) {
 				err = map_blocks(inode, offset, len, &map,
-						PBF_WRITE|PBF_FILE_ALLOCATE);
+						PBF_FILE_ALLOCATE);
 				if (err) {
 					goto error;
 				}
@@ -517,7 +511,7 @@ linvfs_get_block_core(
 	}
 
 	VOP_BMAP(vp, offset, size,
-		create ? flags : PBF_READ, NULL,
+		create ? flags : PBF_READ,
 		(struct page_buf_bmap_s *)&pbmap, &retpbbm, error);
 	if (error)
 		return -error;
diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index 05a4b4e84b4c..9f4a37c46f32 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -43,16 +43,16 @@
 						<< mp->m_writeio_log)
 #define XFS_STRAT_WRITE_IMAPS	2
 
-STATIC int xfs_iomap_read(xfs_iocore_t *, loff_t, size_t, int, pb_bmap_t *,
+STATIC int xfs_iomap_read(xfs_iocore_t *, loff_t, size_t, int, page_buf_bmap_t *,
 			int *);
-STATIC int xfs_iomap_write(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
+STATIC int xfs_iomap_write(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
 			int *, int);
-STATIC int xfs_iomap_write_delay(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
+STATIC int xfs_iomap_write_delay(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
 			int *, int, int);
-STATIC int xfs_iomap_write_direct(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
+STATIC int xfs_iomap_write_direct(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
 			int *, int, int);
 STATIC int _xfs_imap_to_bmap(xfs_iocore_t *, xfs_off_t, xfs_bmbt_irec_t *,
-			pb_bmap_t *, int, int);
+			page_buf_bmap_t *, int, int);
 
 
 /*
@@ -757,112 +757,15 @@ retry:
 	return(ret);
 }
 
-/*
- * xfs_bmap() is the same as the irix xfs_bmap from xfs_rw.c
- * execpt for slight changes to the params
- */
-int
-xfs_bmap(bhv_desc_t	*bdp,
-	xfs_off_t	offset,
-	ssize_t		count,
-	int		flags,
-	struct cred	*cred,
-	pb_bmap_t	*pbmapp,
-	int		*npbmaps)
-{
-	xfs_inode_t	*ip;
-	int		error;
-	int		lockmode;
-	int		fsynced = 0;
-	vnode_t		*vp;
-
-	ip = XFS_BHVTOI(bdp);
-	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
-	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-	ASSERT((flags & PBF_READ) || (flags & PBF_WRITE));
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_iocore.io_mount))
-		return XFS_ERROR(EIO);
-
-	if (flags & PBF_READ) {
-		lockmode = xfs_ilock_map_shared(ip);
-		error = xfs_iomap_read(&ip->i_iocore, offset, count,
-				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps);
-		xfs_iunlock_map_shared(ip, lockmode);
-	} else { /* PBF_WRITE */
-		ASSERT(flags & PBF_WRITE);
-		vp = BHV_TO_VNODE(bdp);
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-		/*
-		 * Make sure that the dquots are there. This doesn't hold
-		 * the ilock across a disk read.
-		 */
-
-		if (XFS_IS_QUOTA_ON(ip->i_mount)) {
-			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-				if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
-					xfs_iunlock(ip, XFS_ILOCK_EXCL);
-					return XFS_ERROR(error);
-				}
-			}
-		}
-retry:
-		error = xfs_iomap_write(&ip->i_iocore, offset, count,
-					pbmapp, npbmaps, flags);
-		/* xfs_iomap_write unlocks/locks/unlocks */
-
-		if (error == ENOSPC) {
-			switch (fsynced) {
-			case 0:
-				if (ip->i_delayed_blks) {
-					filemap_fdatawrite(LINVFS_GET_IP(vp)->i_mapping);
-					fsynced = 1;
-				} else {
-					fsynced = 2;
-					flags |= PBF_SYNC;
-				}
-				error = 0;
-				xfs_ilock(ip, XFS_ILOCK_EXCL);
-				goto retry;
-			case 1:
-				fsynced = 2;
-				if (!(flags & PBF_SYNC)) {
-					flags |= PBF_SYNC;
-					error = 0;
-					xfs_ilock(ip, XFS_ILOCK_EXCL);
-					goto retry;
-				}
-			case 2:
-				sync_blockdev(vp->v_vfsp->vfs_super->s_bdev);
-				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-						XFS_LOG_FORCE|XFS_LOG_SYNC);
-
-				error = 0;
-/**
-				delay(HZ);
-**/
-				fsynced++;
-				xfs_ilock(ip, XFS_ILOCK_EXCL);
-				goto retry;
-			}
-		}
-	}
-
-	return XFS_ERROR(error);
-}
 
 int
-xfs_strategy(bhv_desc_t *bdp,
+xfs_strategy(xfs_inode_t *ip,
 	xfs_off_t	offset,
 	ssize_t		count,
 	int		flags,
-	struct cred	*cred,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps)
 {
-	xfs_inode_t	*ip;
 	xfs_iocore_t	*io;
 	xfs_mount_t	*mp;
 	int		error;
@@ -878,22 +781,16 @@ xfs_strategy(bhv_desc_t *bdp,
 	xfs_bmbt_irec_t imap[XFS_MAX_RW_NBMAPS];
 	xfs_trans_t	*tp;
 
-	ip = XFS_BHVTOI(bdp);
-	vn_trace_entry(BHV_TO_VNODE(bdp), "xfs_strategy",
-					(inst_t *)__return_address);
 	io = &ip->i_iocore;
 	mp = ip->i_mount;
 	/* is_xfs = IO_IS_XFS(io); */
 	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
 	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
 	       ((io->io_flags & XFS_IOCORE_RT) != 0));
-	ASSERT((flags & PBF_READ) || (flags & PBF_WRITE));
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	ASSERT(flags & PBF_WRITE);
-
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	nimaps = min(XFS_MAX_RW_NBMAPS, *npbmaps);
 	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
@@ -1086,12 +983,110 @@ xfs_strategy(bhv_desc_t *bdp,
 }
 
 
+/*
+ * xfs_bmap() is the same as the irix xfs_bmap from xfs_rw.c
+ * execpt for slight changes to the params
+ */
+int
+xfs_bmap(bhv_desc_t	*bdp,
+	xfs_off_t	offset,
+	ssize_t		count,
+	int		flags,
+	page_buf_bmap_t	*pbmapp,
+	int		*npbmaps)
+{
+	xfs_inode_t	*ip;
+	int		error;
+	int		lockmode;
+	int		fsynced = 0;
+	vnode_t		*vp;
+
+	ip = XFS_BHVTOI(bdp);
+	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
+	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_iocore.io_mount))
+		return XFS_ERROR(EIO);
+
+	if (flags & PBF_READ) {
+		lockmode = xfs_ilock_map_shared(ip);
+		error = xfs_iomap_read(&ip->i_iocore, offset, count,
+				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps);
+		xfs_iunlock_map_shared(ip, lockmode);
+	} else if (flags & PBF_FILE_ALLOCATE) {
+		error = xfs_strategy(ip, offset, count, flags,
+				pbmapp, npbmaps);
+	} else { /* PBF_WRITE */
+		ASSERT(flags & PBF_WRITE);
+		vp = BHV_TO_VNODE(bdp);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		/*
+		 * Make sure that the dquots are there. This doesn't hold
+		 * the ilock across a disk read.
+		 */
+
+		if (XFS_IS_QUOTA_ON(ip->i_mount)) {
+			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+				if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
+					xfs_iunlock(ip, XFS_ILOCK_EXCL);
+					return XFS_ERROR(error);
+				}
+			}
+		}
+retry:
+		error = xfs_iomap_write(&ip->i_iocore, offset, count,
+					pbmapp, npbmaps, flags);
+		/* xfs_iomap_write unlocks/locks/unlocks */
+
+		if (error == ENOSPC) {
+			switch (fsynced) {
+			case 0:
+				if (ip->i_delayed_blks) {
+					filemap_fdatawrite(LINVFS_GET_IP(vp)->i_mapping);
+					fsynced = 1;
+				} else {
+					fsynced = 2;
+					flags |= PBF_SYNC;
+				}
+				error = 0;
+				xfs_ilock(ip, XFS_ILOCK_EXCL);
+				goto retry;
+			case 1:
+				fsynced = 2;
+				if (!(flags & PBF_SYNC)) {
+					flags |= PBF_SYNC;
+					error = 0;
+					xfs_ilock(ip, XFS_ILOCK_EXCL);
+					goto retry;
+				}
+			case 2:
+				sync_blockdev(vp->v_vfsp->vfs_super->s_bdev);
+				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+						XFS_LOG_FORCE|XFS_LOG_SYNC);
+
+				error = 0;
+/**
+				delay(HZ);
+**/
+				fsynced++;
+				xfs_ilock(ip, XFS_ILOCK_EXCL);
+				goto retry;
+			}
+		}
+	}
+
+	return XFS_ERROR(error);
+}
+
+
 STATIC int
 _xfs_imap_to_bmap(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,
 	xfs_bmbt_irec_t *imap,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		imaps,			/* Number of imap entries */
 	int		pbmaps)			/* Number of pbmap entries */
 {
@@ -1142,7 +1137,7 @@ xfs_iomap_read(
 	loff_t		offset,
 	size_t		count,
 	int		flags,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps)
 {
 	xfs_fileoff_t	offset_fsb;
@@ -1194,7 +1189,7 @@ xfs_iomap_write(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
 	int		ioflag)
 {
@@ -1262,7 +1257,7 @@ xfs_write_bmap(
 	xfs_mount_t	*mp,
 	xfs_iocore_t	*io,
 	xfs_bmbt_irec_t *imapp,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		iosize,
 	xfs_fileoff_t	ioalign,
 	xfs_fsize_t	isize)
@@ -1332,7 +1327,7 @@ xfs_iomap_write_delay(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
 	int		ioflag,
 	int		found)
@@ -1530,7 +1525,7 @@ xfs_iomap_write_direct(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
 	int		ioflag,
 	int		found)
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 4d1967353a44..3ac8eddedb23 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -39,8 +39,7 @@
  */
 #define XFS_MAX_RW_NBMAPS	4
 
-extern int xfs_bmap (bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, pb_bmap_t *, int *);
-extern int xfs_strategy (bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, pb_bmap_t *, int *);
+extern int xfs_bmap (bhv_desc_t *, xfs_off_t, ssize_t, int, page_buf_bmap_t *, int *);
 extern int xfsbdstrat (struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb (struct xfs_buf *);
 
diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
index 0cc85e88ca5f..97443cb9ffa0 100644
--- a/fs/xfs/linux/xfs_vnode.h
+++ b/fs/xfs/linux/xfs_vnode.h
@@ -211,8 +211,7 @@ typedef int	(*vop_fid2_t)(bhv_desc_t *, struct fid *);
 typedef int	(*vop_release_t)(bhv_desc_t *);
 typedef int	(*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
 typedef void	(*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
-typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, struct page_buf_bmap_s *, int *);
-typedef int	(*vop_strategy_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, struct page_buf_bmap_s *, int *);
+typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct page_buf_bmap_s *, int *);
 typedef int	(*vop_reclaim_t)(bhv_desc_t *);
 typedef int	(*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
 				struct cred *);
@@ -254,7 +253,6 @@ typedef struct vnodeops {
 	vop_rwlock_t		vop_rwlock;
 	vop_rwunlock_t		vop_rwunlock;
 	vop_bmap_t		vop_bmap;
-	vop_strategy_t		vop_strategy;
 	vop_reclaim_t		vop_reclaim;
 	vop_attr_get_t		vop_attr_get;
 	vop_attr_set_t		vop_attr_set;
@@ -286,16 +284,10 @@ typedef struct vnodeops {
 	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,cr);\
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
-#define VOP_BMAP(vp,of,sz,rw,cr,b,n,rv)					\
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
 {									\
 	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,cr,b,n);		\
-	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
-}
-#define VOP_STRATEGY(vp,of,sz,rw,cr,b,n,rv)				\
-{									\
-	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_strategy, vp)((vp)->v_fbhv,of,sz,rw,cr,b,n);	\
+	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n);		\
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
 #define VOP_OPEN(vp, cr, rv)						\
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9b76c90bb544..07f1af20c108 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4937,7 +4937,6 @@ vnodeops_t xfs_vnodeops = {
 	.vop_rwlock		= xfs_rwlock,
 	.vop_rwunlock		= xfs_rwunlock,
 	.vop_bmap		= xfs_bmap,
-	.vop_strategy		= xfs_strategy,
 	.vop_reclaim		= xfs_reclaim,
 	.vop_attr_get		= xfs_attr_get,
 	.vop_attr_set		= xfs_attr_set,
-- 
cgit v1.2.3


From 955397194bf1d644ce4092e4bfdfda847411ac7e Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:19:34 +0200
Subject: XFS: fix some off by one errors in the busy list search code

The errors were benevolent, we flushed the log more than we needed to.

Modid: 2.5.x-xfs:slinx:129745a
---
 fs/xfs/xfs_alloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4489ce3a1502..6a617c57be51 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2597,7 +2597,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
 	cnt = mp->m_perag[agno].pagb_count;
 
-	uend = bno + len;
+	uend = bno + len - 1;
 
 	/* search pagb_list for this slot, skipping open slots */
 	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
@@ -2607,16 +2607,16 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 		 * (start1,length1) within (start2, length2)
 		 */
 		if (bsy->busy_tp != NULL) {
-			bend = bsy->busy_start + bsy->busy_length;
-			if ( (bno >= bsy->busy_start && bno <= bend) ||
-			     (uend >= bsy->busy_start && uend <= bend) ||
-			     (bno <= bsy->busy_start && uend >= bsy->busy_start) ) {
+			bend = bsy->busy_start + bsy->busy_length - 1;
+			if ((bno > bend) ||
+			    (uend < bsy->busy_start)) {
+				cnt--;
+			} else {
 				TRACE_BUSYSEARCH("xfs_alloc_search_busy",
 						 "found1", agno, bno, len, n,
 						 tp);
 				break;
 			}
-			cnt--;
 		}
 	}
 
-- 
cgit v1.2.3


From 6693befa78b942e4381f2b7addfb27a4434f8230 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 03:23:15 +0200
Subject: XFS: Clean up xfs' log message printing

Modid: 2.5.x-xfs:slinx:129771a
---
 fs/xfs/support/debug.c   | 39 ++++++++++-----------------------------
 fs/xfs/xfs_dir.c         |  8 ++++----
 fs/xfs/xfs_dir_leaf.c    |  2 +-
 fs/xfs/xfs_error.c       |  2 +-
 fs/xfs/xfs_fsops.c       |  4 ++--
 fs/xfs/xfs_iget.c        |  1 -
 fs/xfs/xfs_log.c         |  2 +-
 fs/xfs/xfs_log_recover.c |  6 +++---
 fs/xfs/xfs_qm.c          |  4 ++--
 fs/xfs/xfs_rw.c          |  2 +-
 fs/xfs/xfs_trans_buf.c   |  2 +-
 fs/xfs/xfs_vfsops.c      |  4 ++--
 12 files changed, 28 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index b86f415e2f70..d6fe5d8b6983 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -40,6 +40,11 @@ int			doass = 1;
 static char		message[256];	/* keep it off the stack */
 static spinlock_t 	xfs_err_lock = SPIN_LOCK_UNLOCKED;
 
+/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
+static char		*err_level[8] = {KERN_EMERG, KERN_ALERT, KERN_CRIT,
+					 KERN_ERR, KERN_WARNING, KERN_NOTICE,
+					 KERN_INFO, KERN_DEBUG};
+
 void
 assfail(char *a, char *f, int l)
 {
@@ -71,10 +76,7 @@ get_thread_id(void)
 	return current->pid;
 }
 
-# define xdprintk(format...)	printk(format)
-#else
-# define xdprintk(format...)	do { } while (0)
-#endif
+#endif /* DEBUG */
 
 void
 cmn_err(register int level, char *fmt, ...)
@@ -86,18 +88,7 @@ cmn_err(register int level, char *fmt, ...)
 	va_start(ap, fmt);
 	if (*fmt == '!') fp++;
 	vsprintf(message, fp, ap);
-	switch (level) {
-	case CE_CONT:
-	case CE_WARN:
-		printk("%s\n", message);
-		break;
-	case CE_DEBUG:
-		xdprintk("%s\n", message);
-		break;
-	default:
-		printk("%s\n", message);
-		break;
-	}
+	printk("%s%s\n", err_level[level], message);
 	va_end(ap);
 	spin_unlock(&xfs_err_lock);
 
@@ -111,18 +102,8 @@ icmn_err(register int level, char *fmt, va_list ap)
 {
 	spin_lock(&xfs_err_lock);
 	vsprintf(message, fmt, ap);
-	switch (level) {
-	case CE_CONT:
-	case CE_WARN:
-		printk("%s", message);
-		break;
-	case CE_DEBUG:
-		xdprintk("%s", message);
-		break;
-	default:
-		printk("cmn_err level %d ", level);
-		printk("%s\n", message);
-		break;
-	}
 	spin_unlock(&xfs_err_lock);
+	printk("%s%s\n", err_level[level], message);
+	if (level == CE_PANIC)
+		BUG();
 }
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 24248a19942d..ad3ecd88f1a6 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -534,7 +534,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 		return 0;
 	}
 	if (INT_GET(dp->di_core.di_size, ARCH_CONVERT) < sizeof(sf->hdr)) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p\n",
+		xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p",
 			dp);
 		return 1;
 	}
@@ -546,7 +546,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 	count = sf->hdr.count;
 	if ((count < 0) || ((count * 10) > XFS_LITINO(mp))) {
 		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform count: dp 0x%p\n", dp);
+			"Invalid shortform count: dp 0x%p", dp);
 		return(1);
 	}
 
@@ -561,7 +561,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 		xfs_dir_ino_validate(mp, ino);
 		if (sfe->namelen >= XFS_LITINO(mp)) {
 			xfs_fs_cmn_err(CE_WARN, mp,
-				"Invalid shortform namelen: dp 0x%p\n", dp);
+				"Invalid shortform namelen: dp 0x%p", dp);
 			return 1;
 		}
 		namelen_sum += sfe->namelen;
@@ -569,7 +569,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 	}
 	if (namelen_sum >= XFS_LITINO(mp)) {
 		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform namelen: dp 0x%p\n", dp);
+			"Invalid shortform namelen: dp 0x%p", dp);
 		return 1;
 	}
 
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 8c06f2fa1251..d689c169f6e7 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -105,7 +105,7 @@ xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
 		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
 	if (XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
 			XFS_RANDOM_DIR_INO_VALIDATE)) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx\n",
+		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
 				(unsigned long long) ino);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 5e1e059f8fb0..823aa2913e75 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -89,7 +89,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
 		if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
 			cmn_err(CE_WARN,
-	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"\n",
+	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
 				expression, file, line, xfs_etest_fsname[i]);
 			return 1;
 		}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbb6ec5d0321..aac21b2532a6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -353,7 +353,7 @@ xfs_growfs_data_private(
 				  sectbb, 0, &bp);
 		if (error) {
 			xfs_fs_cmn_err(CE_WARN, mp,
-			"error %d reading secondary superblock for ag %d\n",
+			"error %d reading secondary superblock for ag %d",
 				error, agno);
 			break;
 		}
@@ -368,7 +368,7 @@ xfs_growfs_data_private(
 			continue;
 		} else {
 			xfs_fs_cmn_err(CE_WARN, mp,
-		"write error %d updating secondary superblock for ag %d\n",
+		"write error %d updating secondary superblock for ag %d",
 				error, agno);
 			break; /* no point in continuing */
 		}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b4eb0200e7bd..52c1b10c2afd 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -245,7 +245,6 @@ again:
 				cmn_err(CE_PANIC,
 			"xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
 						inode_vp, vp);
-				BUG();
 			}
 
 			read_unlock(&ih->ih_lock);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5115627ae9e2..6fbcb84d584f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2064,7 +2064,7 @@ xlog_state_do_callback(
 		} while (first_iclog != iclog);
 		if (repeats && (repeats % 10) == 0) {
 			xfs_fs_cmn_err(CE_WARN, log->l_mp,
-				"xlog_state_do_callback: looping %d\n", repeats);
+				"xlog_state_do_callback: looping %d", repeats);
 		}
 	} while (!ioerrors && loopdidcallbacks);
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 558cec451a47..125af631e3f2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -476,7 +476,7 @@ xlog_find_head(xlog_t  *log,
 	     * mkfs etc write a dummy unmount record to a fresh
 	     * log so we can store the uuid in there
 	     */
-	    xlog_warn("XFS: totally zeroed log\n");
+	    xlog_warn("XFS: totally zeroed log");
 	}
 
 	return 0;
@@ -3142,7 +3142,7 @@ xlog_unpack_data(xlog_rec_header_t *rhead,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
 		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
 			    cmn_err(CE_DEBUG,
-				"XFS: LogR this is a LogV2 filesystem\n");
+				"XFS: LogR this is a LogV2 filesystem");
 		    }
 		    log->l_flags |= XLOG_CHKSUM_MISMATCH;
 	    }
@@ -3619,7 +3619,7 @@ xlog_recover_finish(xlog_t *log, int mfsi_flags)
 		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
 	} else {
 		cmn_err(CE_DEBUG,
-			"!Ending clean XFS mount for filesystem: %s\n",
+			"!Ending clean XFS mount for filesystem: %s",
 			log->l_mp->m_fsname);
 	}
 	return 0;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 2256b5d8bb0b..b728d271c7ee 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1039,14 +1039,14 @@ xfs_qm_unmount(
 		vp = XFS_ITOV(XFS_QI_UQIP(mp));
 		VN_RELE(vp);
 		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "UQUOTA busy vp=0x%x count=%d\n",
+			cmn_err(CE_WARN, "UQUOTA busy vp=0x%x count=%d",
 				vp, vn_count(vp));
 	}
 	if (XFS_IS_GQUOTA_ON(mp)) {
 		vp = XFS_ITOV(XFS_QI_GQIP(mp));
 		VN_RELE(vp);
 		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "GQUOTA busy vp=0x%x count=%d\n",
+			cmn_err(CE_WARN, "GQUOTA busy vp=0x%x count=%d",
 				vp, vn_count(vp));
 	}
 
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 35978e54d75d..01dc65dc1158 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -230,7 +230,7 @@ xfs_ioerror_alert(
 	xfs_daddr_t		blkno)
 {
 	cmn_err(CE_ALERT,
- "I/O error in filesystem (\"%s\") meta-data dev 0x%x block 0x%llx\n"
+ "I/O error in filesystem (\"%s\") meta-data dev 0x%x block 0x%llx"
  "	 (\"%s\") error %d buf count %u",
 		(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
 		XFS_BUF_TARGET_DEV(bp),
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index f187135a1cef..8daceb174c4f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -472,7 +472,7 @@ shutdown_abort:
 	 */
 #if defined(DEBUG)
 	if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
-		cmn_err(CE_NOTE, "about to pop assert, bp == 0x%x\n", bp);
+		cmn_err(CE_NOTE, "about to pop assert, bp == 0x%x", bp);
 #endif
 	ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
 						(XFS_B_STALE|XFS_B_DELWRI));
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index bcd2efd1ce0d..60213a9540b6 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -225,7 +225,7 @@ xfs_start_flags(
 	    (ap->logbufs < XLOG_NUM_ICLOGS ||
 	     ap->logbufs > XLOG_MAX_ICLOGS)) {
 		cmn_err(CE_WARN, 
-			"XFS: invalid logbufs value: %d [not %d-%d]\n",
+			"XFS: invalid logbufs value: %d [not %d-%d]",
 			ap->logbufs, XLOG_NUM_ICLOGS, XLOG_MAX_ICLOGS);
 		return XFS_ERROR(EINVAL);
 	}
@@ -237,7 +237,7 @@ xfs_start_flags(
 	    ap->logbufsize != 128 * 1024 &&
 	    ap->logbufsize != 256 * 1024) {
 		cmn_err(CE_WARN,
-	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]\n",
+	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
 			ap->logbufsize);
 		return XFS_ERROR(EINVAL);
 	}
-- 
cgit v1.2.3


From 9318cd45c480ef02ab9a73d45ce426c44c7cfd53 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 15 Oct 2002 03:24:14 +0200
Subject: XFS: More XFS debug-related fixes

Modid: 2.5.x-xfs:slinx:129773a
---
 fs/xfs/linux/xfs_globals.c |   1 +
 fs/xfs/xfs_alloc.c         |   8 +-
 fs/xfs/xfsidbg.c           | 212 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 208 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
index e831422a7f7b..7f0ac30a83ba 100644
--- a/fs/xfs/linux/xfs_globals.c
+++ b/fs/xfs/linux/xfs_globals.c
@@ -64,4 +64,5 @@ EXPORT_SYMBOL(xfs_Gqm);
 EXPORT_SYMBOL(xfs_next_bit);
 EXPORT_SYMBOL(xfs_contig_bits);
 EXPORT_SYMBOL(xfs_bmbt_get_all);
+EXPORT_SYMBOL(xfs_bmbt_disk_get_all);
 
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 6a617c57be51..82f51bf82393 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -494,13 +494,13 @@ xfs_alloc_trace_modagf(
 		(void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index b762d3812954..22565c77f3f0 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -53,6 +53,9 @@ static void	xfsidbg_xagf(xfs_agf_t *);
 static void	xfsidbg_xagi(xfs_agi_t *);
 static void	xfsidbg_xaildump(xfs_mount_t *);
 static void	xfsidbg_xalloc(xfs_alloc_arg_t *);
+#ifdef DEBUG
+static void	xfsidbg_xalmtrace(xfs_mount_t *);
+#endif
 static void	xfsidbg_xattrcontext(xfs_attr_list_context_t *);
 static void	xfsidbg_xattrleaf(xfs_attr_leafblock_t *);
 static void	xfsidbg_xattrsf(xfs_attr_shortform_t *);
@@ -196,6 +199,29 @@ static int	kdbm_xfs_xalloc(
 	return 0;
 }
 
+#ifdef DEBUG
+static int	kdbm_xfs_xalmtrace(
+	int	argc,
+	const char **argv,
+	const char **envp,
+	struct pt_regs *regs)
+{
+	unsigned long addr;
+	int nextarg = 1;
+	long offset = 0;
+	int diag;
+
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs);
+	if (diag)
+		return diag;
+
+	xfsidbg_xalmtrace((xfs_mount_t *) addr);
+	return 0;
+}
+#endif /* DEBUG */
+
 static int	kdbm_xfs_xattrcontext(
 	int	argc,
 	const char **argv,
@@ -1706,7 +1732,7 @@ static char	*pb_flag_vals[] = {
 /* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "LOCKABLE",
 /* 20 */ "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED", "MEM_ALLOCATED",
 	 "FORCEIO",
-/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29", 
+/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29",
 /* 30 */ "INVALID30", "INVALID31",
 	 NULL };
 
@@ -1965,7 +1991,7 @@ pb_trace_core(
 
 		if ((trace->event < EV_SIZE-1) && event_names[trace->event]) {
 			event = event_names[trace->event];
-		} else if (trace->event == EV_SIZE) {
+		} else if (trace->event == EV_SIZE-1) {
 			event = (char *)trace->misc;
 		} else {
 			event = value;
@@ -2082,6 +2108,10 @@ static struct xif {
 				"Dump XFS AIL for a mountpoint" },
   {  "xalloc",	kdbm_xfs_xalloc,	"<xfs_alloc_arg_t>",
 				"Dump XFS allocation args structure" },
+#ifdef DEBUG
+  {  "xalmtrc",	kdbm_xfs_xalmtrace,	"<xfs_mount_t>",
+				"Dump XFS alloc mount-point trace" },
+#endif
   {  "xattrcx", kdbm_xfs_xattrcontext,	"<xfs_attr_list_context_t>",
 				"Dump XFS attr_list context struct"},
   {  "xattrlf", kdbm_xfs_xattrleaf,	"<xfs_attr_leafblock_t>",
@@ -2246,6 +2276,9 @@ static char *xfs_alloctype[] = {
 /*
  * Prototypes for static functions.
  */
+#ifdef DEBUG
+static int xfs_alloc_trace_entry(ktrace_entry_t *ktep);
+#endif
 static void xfs_broot(xfs_inode_t *ip, xfs_ifork_t *f);
 static void xfs_btalloc(xfs_alloc_block_t *bt, int bsz);
 static void xfs_btbmap(xfs_bmbt_block_t *bt, int bsz);
@@ -2276,6 +2309,137 @@ static void xfs_xnode_fork(char *name, xfs_ifork_t *f);
  * Static functions.
  */
 
+#ifdef DEBUG
+/*
+ * Print xfs alloc trace buffer entry.
+ */
+static int
+xfs_alloc_trace_entry(ktrace_entry_t *ktep)
+{		  
+	static char *modagf_flags[] = {
+		"magicnum",
+		"versionnum",
+		"seqno",
+		"length",
+		"roots",
+		"levels",
+		"flfirst",
+		"fllast",
+		"flcount",
+		"freeblks",
+		"longest",
+		NULL
+	};
+
+	if (((__psint_t)ktep->val[0] & 0xffff) == 0)
+		return 0;
+	switch ((long)ktep->val[0] & 0xffffL) {
+	case XFS_ALLOC_KTRACE_ALLOC:
+		kdb_printf("alloc %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf(
+	"agno %d agbno %d minlen %d maxlen %d mod %d prod %d minleft %d\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6], 
+			(__psunsigned_t)ktep->val[7], 
+			(__psunsigned_t)ktep->val[8],
+			(__psunsigned_t)ktep->val[9], 
+			(__psunsigned_t)ktep->val[10]);
+		kdb_printf("total %d alignment %d len %d type %s otype %s\n",
+			(__psunsigned_t)ktep->val[11],
+			(__psunsigned_t)ktep->val[12],
+			(__psunsigned_t)ktep->val[13],
+			xfs_alloctype[((__psint_t)ktep->val[14]) >> 16],
+			xfs_alloctype[((__psint_t)ktep->val[14]) & 0xffff]);
+		kdb_printf("wasdel %d wasfromfl %d isfl %d userdata %d\n",
+			((__psint_t)ktep->val[15] & (1 << 3)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 2)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 1)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 0)) != 0);
+		break;
+	case XFS_ALLOC_KTRACE_FREE:
+		kdb_printf("free %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("agno %d agbno %d len %d isfl %d\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psint_t)ktep->val[7]);
+		break;
+	case XFS_ALLOC_KTRACE_MODAGF:
+		kdb_printf("modagf %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		printflags((__psint_t)ktep->val[4], modagf_flags, "modified");
+		kdb_printf("seqno %d length %d roots b %d c %d\n",
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		kdb_printf("levels b %d c %d flfirst %d fllast %d flcount %d\n",
+			(__psunsigned_t)ktep->val[9],
+			(__psunsigned_t)ktep->val[10],
+			(__psunsigned_t)ktep->val[11],
+			(__psunsigned_t)ktep->val[12],
+			(__psunsigned_t)ktep->val[13]);
+		kdb_printf("freeblks %d longest %d\n",
+			(__psunsigned_t)ktep->val[14],
+			(__psunsigned_t)ktep->val[15]);
+		break;
+
+	case XFS_ALLOC_KTRACE_UNBUSY:
+		kdb_printf("unbusy %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	case XFS_ALLOC_KTRACE_BUSY:
+		kdb_printf("busy %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d agbno %d len %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	case XFS_ALLOC_KTRACE_BUSYSEARCH:
+		kdb_printf("busy-search %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d agbno %d len %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	default:
+		kdb_printf("unknown alloc trace record\n");
+		break;
+	}
+	return 1;
+}
+#endif /* DEBUG */
 
 /*
  * Print an xfs in-inode bmap btree root.
@@ -3000,7 +3164,33 @@ xfsidbg_xalloc(xfs_alloc_arg_t *args)
 		args->wasfromfl, args->isfl, args->userdata);
 }
 
+#ifdef DEBUG
+/*
+ * Print out all the entries in the alloc trace buf corresponding
+ * to the given mount point.
+ */
+static void
+xfsidbg_xalmtrace(xfs_mount_t *mp)
+{
+	ktrace_entry_t	*ktep;
+	ktrace_snap_t	kts;
+	extern ktrace_t	*xfs_alloc_trace_buf;
 
+	if (xfs_alloc_trace_buf == NULL) {
+		kdb_printf("The xfs alloc trace buffer is not initialized\n");
+		return;
+	}
+
+	ktep = ktrace_first(xfs_alloc_trace_buf, &kts);
+	while (ktep != NULL) {
+		if ((__psint_t)ktep->val[0] && (xfs_mount_t *)ktep->val[3] == mp) {
+			(void)xfs_alloc_trace_entry(ktep);
+			kdb_printf("\n");
+		}
+		ktep = ktrace_next(xfs_alloc_trace_buf, &kts);
+	}
+}
+#endif /* DEBUG */
 
 /*
  * Print an attr_list() context structure.
@@ -4701,13 +4891,17 @@ xfsidbg_xperag(xfs_mount_t *mp)
 		if (pag->pagi_init)
 			kdb_printf("	i_freecount %d i_inodeok %d\n",
 				pag->pagi_freecount, pag->pagi_inodeok);
-
-		for (busy = 0; busy < XFS_PAGB_NUM_SLOTS; busy++) {
-			kdb_printf("	 %04d: start %d length %d tp 0x%p\n",
-				busy,
-				pag->pagb_list[busy].busy_start,
-				pag->pagb_list[busy].busy_length,
-				pag->pagb_list[busy].busy_tp);
+		if (pag->pagf_init) {
+			for (busy = 0; busy < XFS_PAGB_NUM_SLOTS; busy++) {
+				if (pag->pagb_list[busy].busy_length != 0) {
+					kdb_printf(
+		"	 %04d: start %d length %d tp 0x%p\n",
+					    busy,
+					    pag->pagb_list[busy].busy_start,
+					    pag->pagb_list[busy].busy_length,
+					    pag->pagb_list[busy].busy_tp);
+				}
+			}
 		}
 	}
 }
-- 
cgit v1.2.3


From 7e537f6c4d7c0299e9f1a2e7b99a9794a82b52ab Mon Sep 17 00:00:00 2001
From: Stephen Lord <lord@sgi.com>
Date: Tue, 15 Oct 2002 03:26:56 +0200
Subject: XFS: Fix a couple of nasty log problems

When a transaction crosses multiple iclogs, the async transaction code
needs to force the log up until the last iclog.  We need to record this
lsn in the transaction so we can do a log flush on it.

Secondly, there was a sleep/wakeup pair between flushing the log and
log write completions which was a) incorrect, and  b) no longer needed.
This could result in early wakeups of threads waiting for log flushes.

Modid: 2.5.x-xfs:slinx:129778a
---
 fs/xfs/xfs_alloc.c |  2 +-
 fs/xfs/xfs_log.c   | 48 ++++++------------------------------------------
 fs/xfs/xfs_trans.c |  1 +
 fs/xfs/xfs_trans.h |  5 ++++-
 fs/xfs/xfsidbg.c   |  6 ++++--
 5 files changed, 16 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 82f51bf82393..5c89a956c866 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2626,7 +2626,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	 */
 	if (cnt) {
 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
-		lsn = bsy->busy_tp->t_lsn;
+		lsn = bsy->busy_tp->t_commit_lsn;
 		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
 		xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
 	} else {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6fbcb84d584f..d156b9cb7a7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2155,13 +2155,6 @@ xlog_state_done_syncing(
 		iclog->ic_state = XLOG_STATE_DONE_SYNC;
 	}
 
-	/*
-	 * Someone could be sleeping on the next iclog even though it is
-	 * in the ACTIVE state.	 We kick off one thread to force the
-	 * iclog buffer out.
-	 */
-	if (iclog->ic_next->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
-		sv_signal(&iclog->ic_next->ic_forcesema);
 	LOG_UNLOCK(log, s);
 	xlog_state_do_callback(log, aborted, iclog);	/* also cleans log */
 }	/* xlog_state_done_syncing */
@@ -2984,11 +2977,9 @@ xlog_state_sync(xlog_t	  *log,
 		uint	  flags)
 {
     xlog_in_core_t	*iclog;
-    int			already_slept = 0;
     SPLDECL(s);
 
 
-try_again:
     s = LOG_LOCK(log);
     iclog = log->l_iclog;
 
@@ -3009,39 +3000,12 @@ try_again:
 	}
 
 	if (iclog->ic_state == XLOG_STATE_ACTIVE) {
-		/*
-		 * We sleep here if we haven't already slept (e.g.
-		 * this is the first time we've looked at the correct
-		 * iclog buf) and the buffer before us is going to
-		 * be sync'ed.	We have to do that to ensure that the
-		 * log records go out in the proper order.  When it's
-		 * done, someone waiting on this buffer will be woken up
-		 * (maybe us) to flush this buffer out.
-		 *
-		 * Otherwise, we mark the buffer WANT_SYNC, and bump
-		 * up the refcnt so we can release the log (which drops
-		 * the ref count).  The state switch keeps new transaction
-		 * commits from using this buffer.  When the current commits
-		 * finish writing into the buffer, the refcount will drop to
-		 * zero and the buffer will go out then.
-		 */
-		if (!already_slept &&
-		    (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC |
-						 XLOG_STATE_SYNCING))) {
-			ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-			XFS_STATS_INC(xfsstats.xs_log_force_sleep);
-			sv_wait(&iclog->ic_prev->ic_forcesema, PSWP,
-				&log->l_icloglock, s);
-			already_slept = 1;
-			goto try_again;
-		} else {
-			iclog->ic_refcnt++;
-			xlog_state_switch_iclogs(log, iclog, 0);
-			LOG_UNLOCK(log, s);
-			if (xlog_state_release_iclog(log, iclog))
-				return XFS_ERROR(EIO);
-			s = LOG_LOCK(log);
-		}
+		iclog->ic_refcnt++;
+		xlog_state_switch_iclogs(log, iclog, 0);
+		LOG_UNLOCK(log, s);
+		if (xlog_state_release_iclog(log, iclog))
+			return XFS_ERROR(EIO);
+		s = LOG_LOCK(log);
 	}
 
 	if ((flags & XFS_LOG_SYNC) && /* sleep */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 292ad257bc38..6e8dd532c1dc 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -788,6 +788,7 @@ shut_us_down:
 	commit_lsn = xfs_log_done(mp, tp->t_ticket, log_flags);
 #endif
 
+	tp->t_commit_lsn = commit_lsn;
 	if (nvec > XFS_TRANS_LOGVEC_COUNT) {
 		kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
 	}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1845dd874a4b..5489e4dded40 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -378,7 +378,10 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res_used; /* # of resvd rt extents used */
 	xfs_log_ticket_t	t_ticket;	/* log mgr ticket */
 	sema_t			t_sema;		/* sema for commit completion */
-	xfs_lsn_t		t_lsn;		/* log seq num of trans commit*/
+	xfs_lsn_t		t_lsn;		/* log seq num of start of
+						 * transaction. */
+	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of 
+						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct	*t_dqinfo;	/* accting info for dquots */
 	xfs_trans_callback_t	t_callback;	/* transaction callback */
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 22565c77f3f0..f4d1a6b4951e 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -5299,8 +5299,10 @@ xfsidbg_xtp(xfs_trans_t *tp)
 		tp->t_log_res, tp->t_blk_res, tp->t_blk_res_used);
 	kdb_printf("rt res %d rt res used %d\n", tp->t_rtx_res,
 		tp->t_rtx_res_used);
-	kdb_printf("ticket 0x%lx lsn %s\n",
-		(unsigned long) tp->t_ticket, xfs_fmtlsn(&tp->t_lsn));
+	kdb_printf("ticket 0x%lx lsn %s commit_lsn %s\n",
+		(unsigned long) tp->t_ticket,
+		xfs_fmtlsn(&tp->t_lsn),
+		xfs_fmtlsn(&tp->t_commit_lsn));
 	kdb_printf("callback 0x%p callarg 0x%p\n",
 		tp->t_callback, tp->t_callarg);
 	kdb_printf("icount delta %ld ifree delta %ld\n",
-- 
cgit v1.2.3


From 2243186ad32b8ff152f9fa67085ed85f3916a48c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 03:29:02 +0200
Subject: XFS: Revert VMAP() to the old IRIX prototype

Modid: 2.5.x-xfs:slinx:129878a
---
 fs/xfs/linux/xfs_vnode.c | 3 +--
 fs/xfs/linux/xfs_vnode.h | 7 ++++---
 fs/xfs/xfs_inode.c       | 2 +-
 fs/xfs/xfs_mount.c       | 4 ++--
 fs/xfs/xfs_qm_syscalls.c | 2 +-
 fs/xfs/xfs_quota_priv.h  | 2 +-
 fs/xfs/xfs_rtalloc.c     | 2 +-
 fs/xfs/xfs_vfsops.c      | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
index 95386e4a9729..133880636f54 100644
--- a/fs/xfs/linux/xfs_vnode.c
+++ b/fs/xfs/linux/xfs_vnode.c
@@ -397,8 +397,7 @@ vn_remove(struct vnode *vp)
 	 * After the following purge the vnode
 	 * will no longer exist.
 	 */
-	VMAP(vp, XFS_BHVTOI(vp->v_fbhv), vmap);
-
+	VMAP(vp, vmap);
 	vn_purge(vp, &vmap);
 }
 
diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
index 97443cb9ffa0..dc76f19ca268 100644
--- a/fs/xfs/linux/xfs_vnode.h
+++ b/fs/xfs/linux/xfs_vnode.h
@@ -629,9 +629,10 @@ typedef struct vnode_map {
 	xfs_ino_t	v_ino;			/* inode #	*/
 } vmap_t;
 
-#define VMAP(vp, ip, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
-				 (vmap).v_number = (vp)->v_number,	\
-				 (vmap).v_ino	 = (ip)->i_ino; }
+#define VMAP(vp, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
+			 (vmap).v_number = (vp)->v_number,	\
+			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
+
 extern void	vn_purge(struct vnode *, vmap_t *);
 extern vnode_t	*vn_get(struct vnode *, vmap_t *);
 extern int	vn_revalidate(struct vnode *);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0ee8eee14756..bd343063b60d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3380,7 +3380,7 @@ xfs_iflush_all(
 			 * entry in the list anyway so we'll know below
 			 * whether we reached the end or not.
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			vp->v_flag |= VPURGE;		/* OK for vn_purge */
 			XFS_MOUNT_IUNLOCK(mp);
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2d0203b6836f..2625226c73c7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -911,7 +911,7 @@ xfs_mountfs(
 	rvp = XFS_ITOV(rip);
 	if ((rip->i_d.di_mode & IFMT) != IFDIR) {
 		cmn_err(CE_WARN, "XFS: corrupted root inode");
-		VMAP(rvp, rip, vmap);
+		VMAP(rvp, vmap);
 		prdev("Root inode %llu is not a directory",
 		      mp->m_dev, (unsigned long long)rip->i_ino);
 		rvp->v_flag |= VPURGE;
@@ -958,7 +958,7 @@ xfs_mountfs(
 		 */
 		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
 		rvp->v_flag |= VPURGE;
-		VMAP(rvp, rip, vmap);
+		VMAP(rvp, vmap);
 		VN_RELE(rvp);
 		vn_purge(rvp, &vmap);
 		goto error3;
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 976aed6b46dd..b3cfba7ecad0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -1060,7 +1060,7 @@ again:
 			 * Sample vp mapping while holding the mplock, lest
 			 * we come across a non-existent vnode.
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			ireclaims = mp->m_ireclaims;
 			topino = mp->m_inodes;
 			XFS_MOUNT_IUNLOCK(mp);
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
index c7f8bb60e561..d016d2b6cf8a 100644
--- a/fs/xfs/xfs_quota_priv.h
+++ b/fs/xfs/xfs_quota_priv.h
@@ -181,7 +181,7 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 	  vmap_t dqvmap;		\
 	  vnode_t *dqvp;		\
 	  dqvp = XFS_ITOV(ip);		\
-	  VMAP(dqvp, ip, dqvmap);	\
+	  VMAP(dqvp, dqvmap);		\
 	  VN_RELE(dqvp);		\
 	}
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6ddf6e0035d5..7f5526f4417c 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2322,7 +2322,7 @@ xfs_rtmount_inodes(
 		vmap_t		vmap;		/* vmap to delete vnode */
 
 		rbmvp = XFS_ITOV(mp->m_rbmip);
-		VMAP(rbmvp, mp->m_rbmip, vmap);
+		VMAP(rbmvp, vmap);
 		VN_RELE(rbmvp);
 		vn_purge(rbmvp, &vmap);
 		return error;
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 60213a9540b6..97f6f65feca6 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1115,7 +1115,7 @@ xfs_syncsub(
 			 * in taking a snapshot of the vnode version number
 			 * for use in calling vn_get().
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			IPOINTER_INSERT(ip, mp);
 
 			vp = vn_get(vp, &vmap);
-- 
cgit v1.2.3


From 32a9dde1fe9114ae2770d4b510c4b124137b84d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 15 Oct 2002 03:30:01 +0200
Subject: XFS: Switch from iget_locked to ilookup in vn_get

We only want to get the inode if it actually is present in core.  The
new ilookup function allows us to do this easily instead of having to
tear down the wrongly allocated inode again if it wasn't in memory.

Modid: 2.5.x-xfs:slinx:129883a
---
 fs/xfs/linux/xfs_vnode.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
index 133880636f54..51a855c9a4a0 100644
--- a/fs/xfs/linux/xfs_vnode.c
+++ b/fs/xfs/linux/xfs_vnode.c
@@ -179,20 +179,10 @@ vn_get(struct vnode *vp, vmap_t *vmap)
 	if (inode->i_state & I_FREEING)
 		return NULL;
 
-	inode = iget_locked(vmap->v_vfsp->vfs_super, vmap->v_ino);
+	inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
 	if (inode == NULL)		/* Inode not present */
 		return NULL;
 
-	/* We do not want to create new inodes via vn_get,
-	 * returning NULL here is OK.
-	 */
-	if (inode->i_state & I_NEW) {
-		make_bad_inode(inode);
-		unlock_new_inode(inode);
-		iput(inode);
-		return NULL;
-	}
-
 	vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
 	ASSERT((vp->v_flag & VPURGE) == 0);
 
-- 
cgit v1.2.3


From 90df68e70b631886169c9287faebf2742f43484c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@tove.transmeta.com>
Date: Mon, 14 Oct 2002 21:24:37 -0700
Subject: Block layer ioctl cleanups.

Rename old "block_ioctl()" function: it's "scsi_cmd_ioctl()", as that
is what the function does. Rename the whole file "scsi_ioctl.c"
---
 drivers/block/Makefile      |   4 +-
 drivers/block/blkpg.c       |   2 +-
 drivers/block/block_ioctl.c |  83 -----------------
 drivers/block/scsi_ioctl.c  | 215 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c           |   2 +-
 fs/block_dev.c              |  41 +--------
 include/linux/blkdev.h      |  12 ++-
 7 files changed, 233 insertions(+), 126 deletions(-)
 delete mode 100644 drivers/block/block_ioctl.c
 create mode 100644 drivers/block/scsi_ioctl.c

(limited to 'fs')

diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index eff7ee947ea7..8457b1bfa13a 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,9 +9,9 @@
 #
 
 export-objs	:= elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
-		   block_ioctl.o deadline-iosched.o
+		   scsi_ioctl.o deadline-iosched.o
 
-obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
+obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o scsi_ioctl.o deadline-iosched.o
 
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
index d5ba72a8ac86..7b55729fa29a 100644
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -305,6 +305,6 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
 			return 0;
 
 		default:
-			return -EINVAL;
+			return -ENOTTY;
 	}
 }
diff --git a/drivers/block/block_ioctl.c b/drivers/block/block_ioctl.c
deleted file mode 100644
index edde76503d60..000000000000
--- a/drivers/block/block_ioctl.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
- *
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/config.h>
-#include <linux/swap.h>
-#include <linux/init.h>
-#include <linux/smp_lock.h>
-#include <linux/module.h>
-#include <linux/blk.h>
-#include <linux/completion.h>
-
-#include <linux/cdrom.h>
-
-int blk_do_rq(request_queue_t *q, struct request *rq)
-{
-	DECLARE_COMPLETION(wait);
-	int err = 0;
-
-	rq->flags |= REQ_NOMERGE;
-	rq->waiting = &wait;
-	elv_add_request(q, rq, 1);
-	generic_unplug_device(q);
-	wait_for_completion(&wait);
-
-	/*
-	 * for now, never retry anything
-	 */
-	if (rq->errors)
-		err = -EIO;
-
-	return err;
-}
-
-int block_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
-{
-	request_queue_t *q;
-	struct request *rq;
-	int close = 0, err;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
-	switch (cmd) {
-		case CDROMCLOSETRAY:
-			close = 1;
-		case CDROMEJECT:
-			rq = blk_get_request(q, WRITE, __GFP_WAIT);
-			rq->flags = REQ_BLOCK_PC;
-			memset(rq->cmd, 0, sizeof(rq->cmd));
-			rq->cmd[0] = GPCMD_START_STOP_UNIT;
-			rq->cmd[4] = 0x02 + (close != 0);
-			err = blk_do_rq(q, rq);
-			blk_put_request(rq);
-			break;
-		default:
-			err = -ENOTTY;
-	}
-
-	blk_put_queue(q);
-	return err;
-}
-
-EXPORT_SYMBOL(block_ioctl);
diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
new file mode 100644
index 000000000000..c26646e578da
--- /dev/null
+++ b/drivers/block/scsi_ioctl.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ *
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+#include <linux/completion.h>
+#include <linux/cdrom.h>
+#include <linux/slab.h>
+
+#include <scsi/scsi.h>
+
+#include <asm/uaccess.h>
+
+int blk_do_rq(request_queue_t *q, struct request *rq)
+{
+	DECLARE_COMPLETION(wait);
+	int err = 0;
+
+	rq->flags |= REQ_NOMERGE;
+	rq->waiting = &wait;
+	elv_add_request(q, rq, 1);
+	generic_unplug_device(q);
+	wait_for_completion(&wait);
+
+	/*
+	 * for now, never retry anything
+	 */
+	if (rq->errors)
+		err = -EIO;
+
+	return err;
+}
+
+#include <scsi/sg.h>
+
+static int sg_get_version(int *p)
+{
+	static int sg_version_num = 30527;
+	return put_user(sg_version_num, p);
+}
+
+static int scsi_get_idlun(request_queue_t *q, int *p)
+{
+	return put_user(0, p);
+}
+
+static int scsi_get_bus(request_queue_t *q, int *p)
+{
+	return put_user(0, p);
+}
+
+static int sg_get_timeout(request_queue_t *q)
+{
+	return HZ;
+}
+
+static int sg_set_timeout(request_queue_t *q, int *p)
+{
+	int timeout;
+	int error = get_user(timeout, p);
+	return error;
+}
+
+static int reserved_size = 0;
+
+static int sg_get_reserved_size(request_queue_t *q, int *p)
+{
+	return put_user(reserved_size, p);
+}
+
+static int sg_set_reserved_size(request_queue_t *q, int *p)
+{
+	int size;
+	int error = get_user(size, p);
+	if (!error)
+		reserved_size = size;
+	return error;
+}
+
+static int sg_emulated_host(request_queue_t *q, int *p)
+{
+	return put_user(1, p);
+}
+
+static int sg_io(request_queue_t *q, struct sg_io_hdr *uptr)
+{
+	int i, err;
+	struct sg_io_hdr hdr;
+	struct request *rq;
+	void *buffer;
+
+	if (!access_ok(VERIFY_WRITE, uptr, sizeof(*uptr)))
+		return -EFAULT;
+	if (copy_from_user(&hdr, uptr, sizeof(*uptr)))
+		return -EFAULT;
+
+	if ( hdr.cmd_len > sizeof(rq->cmd) )
+		return -EINVAL;
+
+	buffer = NULL;
+	if (hdr.dxfer_len) {
+		unsigned int bytes = (hdr.dxfer_len + 511) & ~511;
+
+		switch (hdr.dxfer_direction) {
+		default:
+			return -EINVAL;
+		case SG_DXFER_TO_DEV:
+		case SG_DXFER_FROM_DEV:
+		case SG_DXFER_TO_FROM_DEV:
+			break;
+		}
+		buffer = kmalloc(bytes, GFP_USER);
+		if (!buffer)
+			return -ENOMEM;
+		if (hdr.dxfer_direction == SG_DXFER_TO_DEV ||
+		    hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV)
+			copy_from_user(buffer, hdr.dxferp, hdr.dxfer_len);
+	}
+
+	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+	rq->timeout = 60*HZ;
+	rq->data = buffer;
+	rq->data_len = hdr.dxfer_len;
+	rq->flags = REQ_BLOCK_PC;
+	memset(rq->cmd, 0, sizeof(rq->cmd));
+	copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len);
+	err = blk_do_rq(q, rq);
+
+	blk_put_request(rq);
+
+	copy_to_user(uptr, &hdr, sizeof(*uptr));
+	if (buffer) {
+		if (hdr.dxfer_direction == SG_DXFER_FROM_DEV ||
+		    hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV)
+			copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len);
+		kfree(buffer);
+	}
+	return err;
+}
+
+int scsi_cmd_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
+{
+	request_queue_t *q;
+	struct request *rq;
+	int close = 0, err;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	switch (cmd) {
+		case SG_GET_VERSION_NUM:
+			return sg_get_version((int *) arg);
+		case SCSI_IOCTL_GET_IDLUN:
+			return scsi_get_idlun(q, (int *) arg);
+		case SCSI_IOCTL_GET_BUS_NUMBER:
+			return scsi_get_bus(q, (int *) arg);
+		case SG_SET_TIMEOUT:
+			return sg_set_timeout(q, (int *) arg);
+		case SG_GET_TIMEOUT:
+			return sg_get_timeout(q);
+		case SG_GET_RESERVED_SIZE:
+			return sg_get_reserved_size(q, (int *) arg);
+		case SG_SET_RESERVED_SIZE:
+			return sg_set_reserved_size(q, (int *) arg);
+		case SG_EMULATED_HOST:
+			return sg_emulated_host(q, (int *) arg);
+		case SG_IO:
+			return sg_io(q, (struct sg_io_hdr *) arg);
+		case CDROMCLOSETRAY:
+			close = 1;
+		case CDROMEJECT:
+			rq = blk_get_request(q, WRITE, __GFP_WAIT);
+			rq->flags = REQ_BLOCK_PC;
+			rq->data = NULL;
+			rq->data_len = 0;
+			rq->timeout = 60*HZ;
+			memset(rq->cmd, 0, sizeof(rq->cmd));
+			rq->cmd[0] = GPCMD_START_STOP_UNIT;
+			rq->cmd[4] = 0x02 + (close != 0);
+			err = blk_do_rq(q, rq);
+			blk_put_request(rq);
+			break;
+		default:
+			err = -ENOTTY;
+	}
+
+	blk_put_queue(q);
+	return err;
+}
+
+EXPORT_SYMBOL(scsi_cmd_ioctl);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index e2380bcb9fe8..00830680bb42 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -2639,7 +2639,7 @@ static int ide_ioctl (struct inode *inode, struct file *file,
 
 		case CDROMEJECT:
 		case CDROMCLOSETRAY:
-			return block_ioctl(inode->i_bdev, cmd, arg);
+			return scsi_cmd_ioctl(inode->i_bdev, cmd, arg);
 
 		case HDIO_GET_BUSSTATE:
 			if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3b95ff2d40a4..7a3f43f1b186 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -825,44 +825,9 @@ static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
 			unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	int ret = -EINVAL;
-	switch (cmd) {
-	/*
-	 * deprecated, use the /proc/iosched interface instead
-	 */
-	case BLKELVGET:
-	case BLKELVSET:
-		ret = -ENOTTY;
-		break;
-	case BLKRAGET:
-	case BLKROGET:
-	case BLKBSZGET:
-	case BLKSSZGET:
-	case BLKFRAGET:
-	case BLKSECTGET:
-	case BLKRASET:
-	case BLKFRASET:
-	case BLKBSZSET:
-	case BLKPG:
-		ret = blk_ioctl(bdev, cmd, arg);
-		break;
-	case BLKRRPART:
-		ret = blkdev_reread_part(bdev);
-		break;
-	default:
-		if (bdev->bd_op->ioctl)
-			ret =bdev->bd_op->ioctl(inode, file, cmd, arg);
-		if (ret == -EINVAL) {
-			switch (cmd) {
-				case BLKGETSIZE:
-				case BLKGETSIZE64:
-				case BLKFLSBUF:
-				case BLKROSET:
-					ret = blk_ioctl(bdev,cmd,arg);
-					break;
-			}
-		}
-	}
+	int ret = blk_ioctl(bdev, cmd, arg);
+	if (ret == -ENOTTY && bdev->bd_op->ioctl)
+		ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
 	return ret;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4929d743683d..607641c6cfb1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -60,6 +60,12 @@ struct request {
 	int tag;
 	void *special;
 	char *buffer;
+
+	/* For packet commands */
+	unsigned int data_len;
+	void *data, *sense;
+
+	unsigned int timeout;
 	struct completion *waiting;
 	struct bio *bio, *biotail;
 	request_queue_t *q;
@@ -85,6 +91,8 @@ enum rq_flag_bits {
 	__REQ_BLOCK_PC,	/* queued down pc from block layer */
 	__REQ_SENSE,	/* sense retrival */
 
+	__REQ_FAILED,	/* set if the request failed */
+	__REQ_QUIET,	/* don't worry about errors */
 	__REQ_SPECIAL,	/* driver suplied command */
 	__REQ_DRIVE_CMD,
 	__REQ_DRIVE_TASK,
@@ -103,6 +111,8 @@ enum rq_flag_bits {
 #define REQ_PC		(1 << __REQ_PC)
 #define REQ_BLOCK_PC	(1 << __REQ_BLOCK_PC)
 #define REQ_SENSE	(1 << __REQ_SENSE)
+#define REQ_FAILED	(1 << __REQ_FAILED)
+#define REQ_QUIET	(1 << __REQ_QUIET)
 #define REQ_SPECIAL	(1 << __REQ_SPECIAL)
 #define REQ_DRIVE_CMD	(1 << __REQ_DRIVE_CMD)
 #define REQ_DRIVE_TASK	(1 << __REQ_DRIVE_TASK)
@@ -301,7 +311,7 @@ extern int blk_remove_plug(request_queue_t *);
 extern void blk_recount_segments(request_queue_t *, struct bio *);
 extern inline int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
 extern inline int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
-extern int block_ioctl(struct block_device *, unsigned int, unsigned long);
+extern int scsi_cmd_ioctl(struct block_device *, unsigned int, unsigned long);
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void __blk_stop_queue(request_queue_t *q);
-- 
cgit v1.2.3


From d5e567ce2fa25f6867128b590b016d97146686e8 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@shaggy.austin.ibm.com>
Date: Tue, 15 Oct 2002 00:43:56 -0500
Subject: JFS: return code from sb_bread was incorrectly checked

---
 fs/jfs/jfs_mount.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 223d700da927..7859b2f22d28 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -478,12 +478,12 @@ int readSuper(struct super_block *sb, struct buffer_head **bpp)
 {
 	/* read in primary superblock */
 	*bpp = sb_bread(sb, SUPER1_OFF >> sb->s_blocksize_bits);
-	if (bpp)
+	if (*bpp)
 		return 0;
 
 	/* read in secondary/replicated superblock */
 	*bpp = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
-	if (bpp)
+	if (*bpp)
 		return 0;
 
 	return -EIO;
-- 
cgit v1.2.3


From b8ed178862df2381b14b12c9b3c4f7f39053c5e5 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:23:25 -0700
Subject: [PATCH] early allocation of ->part

allocation of ->part[] moved to alloc_disk(); alloc_disk() got an
argument (number of minors expected).  Freeing is in put_disk().
---
 arch/m68k/atari/stram.c         |  3 +--
 arch/um/drivers/ubd_kern.c      |  6 ++---
 drivers/acorn/block/fd1772.c    |  2 +-
 drivers/acorn/block/mfmhd.c     |  3 +--
 drivers/block/DAC960.c          |  3 +--
 drivers/block/acsi.c            |  2 +-
 drivers/block/amiflop.c         |  3 +--
 drivers/block/ataflop.c         |  2 +-
 drivers/block/cciss.c           |  9 ++++---
 drivers/block/cpqarray.c        |  7 +++---
 drivers/block/floppy.c          |  2 +-
 drivers/block/genhd.c           | 52 ++++++++++++++++++++---------------------
 drivers/block/loop.c            |  2 +-
 drivers/block/nbd.c             |  3 +--
 drivers/block/paride/pcd.c      |  3 +--
 drivers/block/paride/pd.c       |  3 +--
 drivers/block/paride/pf.c       |  3 +--
 drivers/block/ps2esdi.c         |  3 +--
 drivers/block/rd.c              |  6 ++---
 drivers/block/swim3.c           |  2 +-
 drivers/block/swim_iop.c        |  2 +-
 drivers/block/umem.c            |  3 +--
 drivers/block/xd.c              |  3 +--
 drivers/block/z2ram.c           |  3 +--
 drivers/cdrom/aztcd.c           |  3 +--
 drivers/cdrom/cdu31a.c          |  3 +--
 drivers/cdrom/cm206.c           |  3 +--
 drivers/cdrom/gscd.c            |  3 +--
 drivers/cdrom/mcd.c             |  3 +--
 drivers/cdrom/mcdx.c            |  3 +--
 drivers/cdrom/optcd.c           |  3 +--
 drivers/cdrom/sbpcd.c           |  3 +--
 drivers/cdrom/sjcd.c            |  3 +--
 drivers/cdrom/sonycd535.c       |  3 +--
 drivers/ide/ide-probe.c         |  3 +--
 drivers/ide/legacy/hd.c         |  3 +--
 drivers/md/md.c                 |  3 +--
 drivers/message/i2o/i2o_block.c |  3 +--
 drivers/mtd/ftl.c               |  3 +--
 drivers/mtd/mtdblock.c          |  3 +--
 drivers/mtd/mtdblock_ro.c       |  2 +-
 drivers/mtd/nftlcore.c          |  3 +--
 drivers/s390/block/dasd_genhd.c |  3 +--
 drivers/s390/block/xpram.c      |  3 +--
 drivers/sbus/char/jsflash.c     |  3 +--
 drivers/scsi/sd.c               |  3 +--
 drivers/scsi/sr.c               |  3 +--
 fs/partitions/check.c           |  5 +---
 include/linux/genhd.h           |  3 ++-
 49 files changed, 81 insertions(+), 125 deletions(-)

(limited to 'fs')

diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index f40e6f70df3b..5d6eac53280a 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -1057,7 +1057,7 @@ int __init stram_device_init(void)
 	if (!max_swap_size)
 		/* swapping not enabled */
 		return -ENXIO;
-	stram_disk = alloc_disk();
+	stram_disk = alloc_disk(1);
 	if (!stram_disk)
 		return -ENOMEM;
 
@@ -1070,7 +1070,6 @@ int __init stram_device_init(void)
 	blk_init_queue(BLK_DEFAULT_QUEUE(STRAM_MAJOR), do_stram_request);
 	stram_disk->major = STRAM_MAJOR;
 	stram_disk->first_minor = STRAM_MINOR;
-	stram_disk->minor_shift = 0;
 	stram_disk->fops = &stram_fops;
 	sprintf(stram_disk->disk_name, "stram");
 	set_capacity(stram_disk, (swap_end - swap_start)/512);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 36995c3f84f6..9229a26c1d16 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -404,12 +404,11 @@ static int ubd_add(int n)
 	if (!dev->file)
 		return -1;
 
-	disk = alloc_disk();
+	disk = alloc_disk(1 << UBD_SHIFT);
 	if (!disk)
 		return -1;
 	disk->major = MAJOR_NR;
 	disk->first_minor = n << UBD_SHIFT;
-	disk->minor_shift = UBD_SHIFT;
 	disk->fops = &ubd_blops;
 	if (fakehd_set)
 		sprintf(disk->disk_name, "hd%c", n + 'a');
@@ -417,14 +416,13 @@ static int ubd_add(int n)
 		sprintf(disk->disk_name, "ubd%d", n);
 
 	if (fake_major) {
-		fake_disk = alloc_disk();
+		fake_disk = alloc_disk(1 << UBD_SHIFT);
 		if (!fake_disk) {
 			put_disk(disk);
 			return -1;
 		}
 		fake_disk->major = fake_major;
 		fake_disk->first_minor = n << UBD_SHIFT;
-		fake_disk->minor_shift = UBD_SHIFT;
 		fake_disk->fops = &ubd_blops;
 		sprintf(fake_disk->disk_name, "ubd%d", n);
 		fake_gendisk[n] = fake_disk;
diff --git a/drivers/acorn/block/fd1772.c b/drivers/acorn/block/fd1772.c
index 85b5b9cd8859..1285b8388421 100644
--- a/drivers/acorn/block/fd1772.c
+++ b/drivers/acorn/block/fd1772.c
@@ -1547,7 +1547,7 @@ int fd1772_init(void)
 		return 0;
 
 	for (i = 0; i < FD_MAX_UNITS; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out;
 	}
diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c
index 32bef8806190..b29cb6ad5505 100644
--- a/drivers/acorn/block/mfmhd.c
+++ b/drivers/acorn/block/mfmhd.c
@@ -1336,12 +1336,11 @@ static int __init mfm_init (void)
 		goto out3;
 	
 	for (i = 0; i < mfm_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i << 6;
-		disk->minor_shift = 6;
 		disk->fops = &mfm_fops;
 		sprintf(disk->disk_name, "mfm%c", 'a'+i);
 		mfm_gendisk[i] = disk;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 24a1ee66d93b..1c1a72e440e6 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1962,7 +1962,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 	sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n);
 	disk->major = MajorNumber;
 	disk->first_minor = n << DAC960_MaxPartitionsBits;
-	disk->minor_shift = DAC960_MaxPartitionsBits;
 	disk->fops = &DAC960_BlockDeviceOperations;
    }
   /*
@@ -2200,7 +2199,7 @@ static void DAC960_DetectControllers(DAC960_HardwareType_T HardwareType)
 	}
       memset(Controller, 0, sizeof(DAC960_Controller_T));
       for (i = 0; i < DAC960_MaxLogicalDrives; i++) {
-		Controller->disks[i] = alloc_disk();
+		Controller->disks[i] = alloc_disk(1<<DAC960_MaxPartitionsBits);
 		if (!Controller->disks[i])
 			goto Enomem;
       }
diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index 5d36adb832e2..006ff8b23e32 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -1729,7 +1729,7 @@ int acsi_init( void )
 #endif
 	err = -ENOMEM;
 	for( i = 0; i < NDevices; ++i ) {
-		acsi_gendisk[i] = alloc_disk();
+		acsi_gendisk[i] = alloc_disk(16);
 		if (!acsi_gendisk[i])
 			goto out4;
 	}
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 22790c4145fe..df7d1ac10fe9 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1735,7 +1735,7 @@ static int __init fd_probe_drives(void)
 		fd_probe(drive);
 		if (unit[drive].type->code == FD_NODRIVE)
 			continue;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		if (!disk) {
 			unit[drive].type->code = FD_NODRIVE;
 			continue;
@@ -1751,7 +1751,6 @@ static int __init fd_probe_drives(void)
 		printk("fd%d ",drive);
 		disk->major = MAJOR_NR;
 		disk->first_minor = drive;
-		disk->minor_shift = 0;
 		disk->fops = &floppy_fops;
 		sprintf(disk->disk_name, "fd%d", drive);
 		set_capacity(disk, 880*2);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 12f3ae02b317..eac85e42887f 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1949,7 +1949,7 @@ int __init atari_floppy_init (void)
 	}
 
 	for (i = 0; i < FD_MAX_UNITS; i++) {
-		unit[i].disk = alloc_disk();
+		unit[i].disk = alloc_disk(1);
 		if (!unit[i].disk)
 			goto Enomem;
 	}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ebd7a216810f..caa30e1c6e84 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -740,7 +740,7 @@ static int revalidate_allvol(kdev_t dev)
 
 	for(i=0; i< NWD; i++) {
 		struct gendisk *disk = hba[ctlr]->gendisk[i];
-		if (disk->part)
+		if (disk->flags & GENHD_FL_UP)
 			del_gendisk(disk);
 	}
 
@@ -792,7 +792,7 @@ static int deregister_disk(int ctlr, int logvol)
 	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 
 	/* invalidate the devices and deregister the disk */ 
-	if (disk->part)
+	if (disk->flags & GENHD_FL_UP)
 		del_gendisk(disk);
 	/* check to see if it was the last disk */
 	if (logvol == h->highest_lun) {
@@ -2274,7 +2274,7 @@ static int alloc_cciss_hba(void)
 	struct gendisk *disk[NWD];
 	int i, n;
 	for (n = 0; n < NWD; n++) {
-		disk[n] = alloc_disk();
+		disk[n] = alloc_disk(1 << NWD_SHIFT);
 		if (!disk[n])
 			goto out;
 	}
@@ -2447,7 +2447,6 @@ static int __init cciss_init_one(struct pci_dev *pdev,
 		sprintf(disk->disk_name, "cciss/c%dd%d", i, j);
 		disk->major = MAJOR_NR + i;
 		disk->first_minor = j << NWD_SHIFT;
-		disk->minor_shift = NWD_SHIFT;
 		if( !(drv->nr_blocks))
 			continue;
 		(BLK_DEFAULT_QUEUE(MAJOR_NR + i))->hardsect_size = drv->block_size;
@@ -2500,7 +2499,7 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev)
 	/* remove it from the disk list */
 	for (j = 0; j < NWD; j++) {
 		struct gendisk *disk = hba[i]->gendisk[j];
-		if (disk->part)
+		if (disk->flags & GENHD_FL_UP)
 			del_gendisk(disk);
 	}
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 7bfa29a5bc89..c3b1c4b17ea7 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -304,7 +304,7 @@ static void __exit cpqarray_exit(void)
 		kfree(hba[i]->cmd_pool_bits);
 
 		for (j = 0; j < NWD; j++) {
-			if (ida_gendisk[i][j]->part)
+			if (ida_gendisk[i][j]->flags & GENHD_FL_UP)
 				del_gendisk(ida_gendisk[i][j]);
 			put_disk(ida_gendisk[i][j]);
 		}
@@ -358,7 +358,7 @@ static int __init cpqarray_init(void)
 		}
 		num_cntlrs_reg++;
 		for (j=0; j<NWD; j++) {
-			ida_gendisk[i][j] = alloc_disk();
+			ida_gendisk[i][j] = alloc_disk(1 << NWD_SHIFT);
 			if (!ida_gendisk[i][j])
 				goto Enomem2;
 		}
@@ -405,7 +405,6 @@ static int __init cpqarray_init(void)
 			sprintf(disk->disk_name, "ida/c%dd%d", i, j);
 			disk->major = MAJOR_NR + i;
 			disk->first_minor = j<<NWD_SHIFT;
-			disk->minor_shift = NWD_SHIFT;
 			disk->flags = GENHD_FL_DEVFS;
 			disk->fops = &ida_fops; 
 			if (!drv->nr_blks)
@@ -1428,7 +1427,7 @@ static int revalidate_allvol(kdev_t dev)
 	 */
 	for (i = 0; i < NWD; i++) {
 		struct gendisk *disk = ida_gendisk[ctlr][i];
-		if (disk->part)
+		if (disk->flags & GENDH_FL_UP)
 			del_gendisk(disk);
 	}
 	memset(hba[ctlr]->drv,            0, sizeof(drv_info_t)*NWD);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 924e1e011f76..8783ee17314d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4240,7 +4240,7 @@ int __init floppy_init(void)
 	raw_cmd = NULL;
 
 	for (i=0; i<N_DRIVE; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto Enomem;
 	}
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 3f6d259165f3..b230df7f7b70 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -57,34 +57,16 @@ EXPORT_SYMBOL(blk_set_probe);	/* Will go away */
  * This function registers the partitioning information in @gp
  * with the kernel.
  */
-static void add_gendisk(struct gendisk *gp)
+void add_disk(struct gendisk *disk)
 {
-	struct hd_struct *p = NULL;
-
-	if (gp->minor_shift) {
-		size_t size = sizeof(struct hd_struct)*((1<<gp->minor_shift)-1);
-		p = kmalloc(size, GFP_KERNEL);
-		if (!p) {
-			printk(KERN_ERR "out of memory; no partitions for %s\n",
-				gp->disk_name);
-			gp->minor_shift = 0;
-		} else
-			memset(p, 0, size);
-	}
-	gp->part = p;
-
 	write_lock(&gendisk_lock);
-	list_add(&gp->list, &gendisks[gp->major].list);
-	if (gp->minor_shift)
-		list_add_tail(&gp->full_list, &gendisk_list);
+	list_add(&disk->list, &gendisks[disk->major].list);
+	if (disk->minor_shift)
+		list_add_tail(&disk->full_list, &gendisk_list);
 	else
-		INIT_LIST_HEAD(&gp->full_list);
+		INIT_LIST_HEAD(&disk->full_list);
 	write_unlock(&gendisk_lock);
-}
-
-void add_disk(struct gendisk *disk)
-{
-	add_gendisk(disk);
+	disk->flags |= GENHD_FL_UP;
 	register_disk(disk);
 }
 
@@ -225,17 +207,33 @@ __initcall(device_init);
 
 EXPORT_SYMBOL(disk_devclass);
 
-struct gendisk *alloc_disk(void)
+struct gendisk *alloc_disk(int minors)
 {
 	struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
-	if (disk)
+	if (disk) {
 		memset(disk, 0, sizeof(struct gendisk));
+		if (minors > 1) {
+			int size = (minors - 1) * sizeof(struct hd_struct);
+			disk->part = kmalloc(size, GFP_KERNEL);
+			if (!disk->part) {
+				kfree(disk);
+				return NULL;
+			}
+			memset(disk->part, 0, size);
+		}
+		disk->minors = minors;
+		while (minors >>= 1)
+			disk->minor_shift++;
+	}
 	return disk;
 }
 
 void put_disk(struct gendisk *disk)
 {
-	kfree(disk);
+	if (disk) {
+		kfree(disk->part);
+		kfree(disk);
+	}
 }
 EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e39755017faf..14fa8720f8db 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1075,7 +1075,7 @@ int __init loop_init(void)
 		goto out_mem;
 
 	for (i = 0; i < max_loop; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out_mem2;
 	}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index be27027d32b8..27726bd0246a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -507,7 +507,7 @@ static int __init nbd_init(void)
 	}
 
 	for (i = 0; i < MAX_NBD; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
@@ -537,7 +537,6 @@ static int __init nbd_init(void)
 		nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &nbd_fops;
 		sprintf(disk->disk_name, "nbd%d", i);
 		set_capacity(disk, 0x3ffffe);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 0e4bac2bd1ef..95bedb2a580c 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -281,7 +281,7 @@ static void pcd_init_units(void)
 
 	pcd_drive_count = 0;
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		cd->disk = disk;
@@ -303,7 +303,6 @@ static void pcd_init_units(void)
 		cd->info.mask = 0;
 		disk->major = major;
 		disk->first_minor = unit;
-		disk->minor_shift = 0;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
 	}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7fdf4a3e4b2a..3a3ad6390118 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -703,14 +703,13 @@ static int pd_detect(void)
 	}
 	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
 		if (disk->present) {
-			struct gendisk *p = alloc_disk();
+			struct gendisk *p = alloc_disk(1 << PD_BITS);
 			if (!p) {
 				disk->present = 0;
 				k--;
 				continue;
 			}
 			strcpy(p->disk_name, disk->name);
-			p->minor_shift = PD_BITS;
 			p->fops = &pd_fops;
 			p->major = major;
 			p->first_minor = unit << PD_BITS;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index becf37efd5ec..69a2c8e23eae 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -308,7 +308,7 @@ void pf_init_units(void)
 
 	pf_drive_count = 0;
 	for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		pf->disk = disk;
@@ -320,7 +320,6 @@ void pf_init_units(void)
 		disk->major = MAJOR_NR;
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, pf->name);
-		disk->minor_shift = 0;
 		disk->fops = &pf_fops;
 		if (!(*drives[unit])[D_PRT])
 			pf_drive_count++;
diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c
index 770fbfd4613f..ed022ff34e3e 100644
--- a/drivers/block/ps2esdi.c
+++ b/drivers/block/ps2esdi.c
@@ -421,13 +421,12 @@ static int __init ps2esdi_geninit(void)
 
 	error = -ENOMEM;
 	for (i = 0; i < ps2esdi_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto err_out4;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<6;
 		sprintf(disk->disk_name, "ed%c", 'a'+i);
-		disk->minor_shift = 6;
 		disk->fops = &ps2esdi_fops;
 		ps2esdi_gendisk[i] = disk;
 	}
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index a0e60c5972a6..391664b9a34f 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -431,17 +431,16 @@ static int __init rd_init (void)
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
-	initrd_disk = alloc_disk();
+	initrd_disk = alloc_disk(1);
 	if (!initrd_disk)
 		return -ENOMEM;
 	initrd_disk->major = MAJOR_NR;
 	initrd_disk->first_minor = INITRD_MINOR;
-	initrd_disk->minor_shift = 0;
 	initrd_disk->fops = &rd_bd_op;	
 	sprintf(initrd_disk->disk_name, "initrd");
 #endif
 	for (i = 0; i < NUM_RAMDISKS; i++) {
-		rd_disks[i] = alloc_disk();
+		rd_disks[i] = alloc_disk(1);
 		if (!rd_disks[i])
 			goto out;
 	}
@@ -460,7 +459,6 @@ static int __init rd_init (void)
 		rd_length[i] = rd_size << 10;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &rd_bd_op;
 		sprintf(disk->disk_name, "rd%d", i);
 		set_capacity(disk, rd_size * 2);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index b1cb36f3ca5c..2a5f3afefbfa 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1037,7 +1037,7 @@ int swim3_init(void)
 		return -ENODEV;
 
 	for (i = 0; i < floppy_count; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out;
 	}
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 29c2f1696063..3ec747c3f80f 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -188,7 +188,7 @@ int swimiop_init(void)
 	printk("SWIM-IOP: detected %d installed drives.\n", floppy_count);
 
 	for (i = 0; i < floppy_count; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		disk->major = MAJOR_NR;
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 53dfd2a7c624..3c6a3b8294b8 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -1190,7 +1190,7 @@ int __init mm_init(void)
 	}
 
 	for (i = 0; i < num_cards; i++) {
-		mm_gendisk[i] = alloc_disk();
+		mm_gendisk[i] = alloc_disk(1 << MM_SHIFT);
 		if (!mm_gendisk[i])
 			goto out;
 	}
@@ -1203,7 +1203,6 @@ int __init mm_init(void)
 		spin_lock_init(&cards[i].lock);
 		disk->major = major_nr;
 		disk->first_minor  = i << MM_SHIFT;
-		disk->minor_shift = MM_SHIFT;
 		disk->fops = &mm_fops;
 		set_capacity(disk, cards[i].mm_size << 1);
 		add_disk(disk);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 3e3315e81bde..4467ba777d60 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -205,12 +205,11 @@ static int __init xd_init(void)
 		goto out3;
 
 	for (i = 0; i < xd_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<6;
-		disk->minor_shift = 6;
 		sprintf(disk->disk_name, "xd%c", i+'a');
 		disk->fops = &xd_fops;
 		xd_gendisk[i] = disk;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 30625811de3e..edb2676680e3 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -365,14 +365,13 @@ z2_init( void )
 	    MAJOR_NR );
 	return -EBUSY;
     }
-    z2ram_gendisk = alloc_disk();
+    z2ram_gendisk = alloc_disk(1);
     if (!z2ram_gendisk) {
 	unregister_blkdev( MAJOR_NR, DEVICE_NAME );
 	return -ENOMEM;
     }
     z2ram_gendisk->major = MAJOR_NR;
     z2ram_gendisk->first_minor = 0;
-    z2ram_gendisk->minor_shift = 0;
     z2ram_gendisk->fops = &z2_fops;
     sprintf(z2ram_gendisk->disk_name, "z2ram");
 
diff --git a/drivers/cdrom/aztcd.c b/drivers/cdrom/aztcd.c
index 53f8fe2bafe2..b8e1880d8714 100644
--- a/drivers/cdrom/aztcd.c
+++ b/drivers/cdrom/aztcd.c
@@ -1908,7 +1908,7 @@ static int __init aztcd_init(void)
 	}
 	devfs_register(NULL, "aztcd", DEVFS_FL_DEFAULT, MAJOR_NR, 0,
 		       S_IFBLK | S_IRUGO | S_IWUGO, &azt_fops, NULL);
-	azt_disk = alloc_disk();
+	azt_disk = alloc_disk(1);
 	if (!azt_disk)
 		goto err_out2;
 	if (register_blkdev(MAJOR_NR, "aztcd", &azt_fops) != 0) {
@@ -1921,7 +1921,6 @@ static int __init aztcd_init(void)
 	blk_queue_hardsect_size(BLK_DEFAULT_QUEUE(MAJOR_NR), 2048);
 	azt_disk->major = MAJOR_NR;
 	azt_disk->first_minor = 0;
-	azt_disk->minor_shift = 0;
 	azt_disk->fops = &azt_fops;
 	sprintf(azt_disk->disk_name, "aztcd");
 	add_disk(azt_disk);
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index 8863cb1254de..f4077094707a 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -3366,12 +3366,11 @@ int __init cdu31a_init(void)
 		goto errout2;
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		goto errout1;
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "cdu31a");
 	disk->fops = &scd_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/cm206.c b/drivers/cdrom/cm206.c
index 0da8b3bcdf30..8a83a381bcc1 100644
--- a/drivers/cdrom/cm206.c
+++ b/drivers/cdrom/cm206.c
@@ -1470,12 +1470,11 @@ int __init cm206_init(void)
 		printk(KERN_INFO "Cannot register for major %d!\n", MAJOR_NR);
 		goto out_blkdev;
 	}
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		goto out_disk;
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "cm206");
 	disk->fops = &cm206_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/gscd.c b/drivers/cdrom/gscd.c
index 9e8a14ce9374..d82b99f5a4b5 100644
--- a/drivers/cdrom/gscd.c
+++ b/drivers/cdrom/gscd.c
@@ -972,12 +972,11 @@ static int __init gscd_init(void)
 		i++;
 	}
 
-	gscd_disk = alloc_disk();
+	gscd_disk = alloc_disk(1);
 	if (!gscd_disk)
 		goto err_out1;
 	gscd_disk->major = MAJOR_NR;
 	gscd_disk->first_minor = 0;
-	gscd_disk->minor_shift = 0;
 	gscd_disk->fops = &gscd_fops;
 	sprintf(gscd_disk->disk_name, "gscd");
 
diff --git a/drivers/cdrom/mcd.c b/drivers/cdrom/mcd.c
index 39eff9436cbf..e6c72eabda52 100644
--- a/drivers/cdrom/mcd.c
+++ b/drivers/cdrom/mcd.c
@@ -1031,7 +1031,7 @@ static void mcd_release(struct cdrom_device_info *cdi)
 
 int __init mcd_init(void)
 {
-	struct gendisk *disk = alloc_disk();
+	struct gendisk *disk = alloc_disk(1);
 	int count;
 	unsigned char result[3];
 	char msg[80];
@@ -1124,7 +1124,6 @@ int __init mcd_init(void)
 
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "mcd");
 	disk->fops = &mcd_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/mcdx.c b/drivers/cdrom/mcdx.c
index 7b6aaace0be1..9747c15b926b 100644
--- a/drivers/cdrom/mcdx.c
+++ b/drivers/cdrom/mcdx.c
@@ -1076,7 +1076,7 @@ int __init mcdx_init_drive(int drive)
 		return 1;
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk) {
 		xwarn("init() malloc failed\n");
 		kfree(stuffp);
@@ -1221,7 +1221,6 @@ int __init mcdx_init_drive(int drive)
 	stuffp->info.dev = mk_kdev(MAJOR_NR, drive);
 	disk->major = MAJOR_NR;
 	disk->first_minor = drive;
-	disk->minor_shift = 0;
 	strcpy(disk->disk_name, stuffp->info.name);
 	disk->fops = &mcdx_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/optcd.c b/drivers/cdrom/optcd.c
index baf39fd6f708..6abce539684e 100644
--- a/drivers/cdrom/optcd.c
+++ b/drivers/cdrom/optcd.c
@@ -2010,14 +2010,13 @@ static int __init optcd_init(void)
 			"optcd: no Optics Storage CDROM Initialization\n");
 		return -EIO;
 	}
-	optcd_disk = alloc_disk();
+	optcd_disk = alloc_disk(1);
 	if (!optcd_disk) {
 		printk(KERN_ERR "optcd: can't allocate disk\n");
 		return -ENOMEM;
 	}
 	optcd_disk->major = MAJOR_NR;
 	optcd_disk->first_minor = 0;
-	optcd_disk->minor_shift = 0;
 	optcd_disk->fops = &opt_fops;
 	sprintf(optcd_disk->disk_name, "optcd");
 	if (!request_region(optcd_port, 4, "optcd")) {
diff --git a/drivers/cdrom/sbpcd.c b/drivers/cdrom/sbpcd.c
index 409aea0c4f0f..22a4ca708c6f 100644
--- a/drivers/cdrom/sbpcd.c
+++ b/drivers/cdrom/sbpcd.c
@@ -5831,10 +5831,9 @@ int __init sbpcd_init(void)
 		sbpcd_infop->dev = mk_kdev(MAJOR_NR, j);
 		sbpcd_infop->handle = p;
 		p->sbpcd_infop = sbpcd_infop;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		disk->major = MAJOR_NR;
 		disk->first_minor = j;
-		disk->minor_shift = 0;
 		disk->fops = &sbpcd_bdops;
 		strcpy(disk->disk_name, sbpcd_infop->name);
 		disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/sjcd.c b/drivers/cdrom/sjcd.c
index c04647548625..9dcdda8741b0 100644
--- a/drivers/cdrom/sjcd.c
+++ b/drivers/cdrom/sjcd.c
@@ -1689,14 +1689,13 @@ static int __init sjcd_init(void)
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_sjcd_request, &sjcd_lock);
 	blk_queue_hardsect_size(BLK_DEFAULT_QUEUE(MAJOR_NR), 2048);
 
-	sjcd_disk = alloc_disk();
+	sjcd_disk = alloc_disk(1);
 	if (!sjcd_disk) {
 		printk(KERN_ERR "SJCD: can't allocate disk");
 		goto out1;
 	}
 	sjcd_disk->major = MAJOR_NR,
 	sjcd_disk->first_minor = 0,
-	sjcd_disk->minor_shift = 0,
 	sjcd_disk->fops = &sjcd_fops,
 	sprintf(sjcd_disk->disk_name, "sjcd");
 
diff --git a/drivers/cdrom/sonycd535.c b/drivers/cdrom/sonycd535.c
index d73013c02bad..68e8103a7223 100644
--- a/drivers/cdrom/sonycd535.c
+++ b/drivers/cdrom/sonycd535.c
@@ -1605,12 +1605,11 @@ static int __init sony535_init(void)
 	}
 	initialized = 1;
 
-	cdu_disk = alloc_disk();
+	cdu_disk = alloc_disk(1);
 	if (!cdu_disk)
 		goto out6;
 	cdu_disk->major = MAJOR_NR;
 	cdu_disk->first_minor = 0;
-	cdu_disk->minor_shift = 0;
 	cdu_disk->fops = &cdu_fops;
 	sprintf(cdu_disk->disk_name, "cdu");
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 6277ce3cb1e0..478bffc6aed8 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -986,7 +986,7 @@ static void init_gendisk (ide_hwif_t *hwif)
 	units = MAX_DRIVES;
 
 	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		disks[unit] = alloc_disk();
+		disks[unit] = alloc_disk(1 << PARTN_BITS);
 		if (!disks[unit])
 			goto err_kmalloc_gd;
 	}
@@ -996,7 +996,6 @@ static void init_gendisk (ide_hwif_t *hwif)
 		disk->major  = hwif->major;
 		disk->first_minor = unit << PARTN_BITS;
 		sprintf(disk->disk_name,"hd%c",'a'+hwif->index*MAX_DRIVES+unit);
-		disk->minor_shift = PARTN_BITS; 
 		disk->fops = ide_fops;
 		hwif->drives[unit].disk = disk;
 	}
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index b0f5f104876d..7dc166b8e646 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -802,12 +802,11 @@ static int __init hd_init(void)
 		goto out;
 
 	for (drive=0 ; drive < NR_HD ; drive++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = drive << 6;
-		disk->minor_shift = 6;
 		disk->fops = &hd_fops;
 		sprintf(disk->disk_name, "hd%c", 'a'+drive);
 		hd_gendisk[drive] = disk;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a40c6af55da5..205bb0fdeee0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1394,12 +1394,11 @@ static int do_md_run(mddev_t * mddev)
 #endif
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		return -ENOMEM;
 	disk->major = MD_MAJOR;
 	disk->first_minor = mdidx(mddev);
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "md%d", mdidx(mddev));
 	disk->fops = &md_fops;
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 0980a0b775c6..b6f8af6193f1 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1647,7 +1647,7 @@ static int i2o_block_init(void)
 	}
 
 	for (i = 0; i < MAX_I2OB; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(16);
 		if (!disk)
 			goto oom;
 		i2o_disk[i] = disk;
@@ -1679,7 +1679,6 @@ static int i2o_block_init(void)
 		struct gendisk *disk = i2ob_disk + i;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<4;
-		disk->minor_shift = 4;
 		disk->fops = &i2ob_fops;
 		sprintf(disk->disk_name, "i2o/hd%c", 'a' + i);
 	}
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 341ad2252885..e40e34d3c7d6 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1223,7 +1223,7 @@ static void ftl_notify_add(struct mtd_info *mtd)
 	}
 
 	partition = kmalloc(sizeof(partition_t), GFP_KERNEL);
-	disk = alloc_disk();
+	disk = alloc_disk(1 << PART_BITS);
 		
 	if (!partition||!disk) {
 		printk(KERN_WARNING "No memory to scan for FTL on %s\n",
@@ -1237,7 +1237,6 @@ static void ftl_notify_add(struct mtd_info *mtd)
 	sprintf(disk->disk_name, "ftl%c", 'a' + device);
 	disk->major = FTL_MAJOR;
 	disk->first_minor = device << 4;
-	disk->minor_shift = PART_BITS;
 	disk->fops = &ftl_blk_fops;
 	partition->mtd = mtd;
 	partition->disk = disk;
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 1ad148bd3364..6b32d3cfb390 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -295,7 +295,7 @@ static int mtdblock_open(struct inode *inode, struct file *file)
 	spin_unlock(&mtdblks_lock);
 
 	mtdblk = kmalloc(sizeof(struct mtdblk_dev), GFP_KERNEL);
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!mtdblk || !disk)
 		goto Enomem;
 	memset(mtdblk, 0, sizeof(*mtdblk));
@@ -313,7 +313,6 @@ static int mtdblock_open(struct inode *inode, struct file *file)
 	}
 	disk->major = MAJOR_NR;
 	disk->first_minor = dev;
-	disk->minor_shift = 0;
 	disk->fops = &mtd_fops;
 	sprintf(disk->disk_name, "mtd%d", dev);
 	mtdblk->disk = disk;
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 65b97e3a11df..97e8437a75d5 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -224,7 +224,7 @@ int __init init_mtdblock(void)
 	int i;
 
 	for (i = 0; i < MAX_MTD_DEVICES; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		disk->major = MAJOR_NR;
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 60d26b10740e..155aa92a9429 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -74,7 +74,7 @@ static void NFTL_setup(struct mtd_info *mtd)
         }
 
 	nftl = kmalloc(sizeof(struct NFTLrecord), GFP_KERNEL);
-	gd = alloc_disk();
+	gd = alloc_disk(1 << NFTL_PARTN_BITS);
 	if (!nftl || !gd) {
 		kfree(nftl);
 		put_disk(gd);
@@ -132,7 +132,6 @@ static void NFTL_setup(struct mtd_info *mtd)
 	sprintf(gd->disk_name, "nftl%c", 'a' + firstfree);
 	gd->major = MAJOR_NR;
 	gd->first_minor = firstfree << NFTL_PARTN_BITS;
-	gd->minor_shift = NFTL_PARTN_BITS;
 	set_capacity(gd, nftl->nr_sects);
 	nftl->disk = gd;
 	add_disk(gd);
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index be6c7dc5aa0a..67597043b718 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -190,14 +190,13 @@ dasd_gendisk_alloc(int devindex)
 		}
 	}
 
-	gdp = alloc_disk();
+	gdp = alloc_disk(1 << DASD_PARTN_BITS);
 	if (!gdp)
 		return ERR_PTR(-ENOMEM);
 
 	/* Initialize gendisk structure. */
 	gdp->major = mi->major;
 	gdp->first_minor = index << DASD_PARTN_BITS;
-	gdp->minor_shift = DASD_PARTN_BITS;
 	gdp->fops = &dasd_device_operations;
 
 	/*
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 80f8b7573a41..4db75e1b7e73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -441,7 +441,7 @@ static int __init xpram_setup_blkdev(void)
 	int i, rc = -ENOMEM;
 
 	for (i = 0; i < xpram_devs; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		xpram_disks[i] = disk;
@@ -481,7 +481,6 @@ static int __init xpram_setup_blkdev(void)
 		offset += xpram_devices[i].size;
 		disk->major = XPRAM_MAJOR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &xpram_devops;
 		sprintf(disk->disk_name, "slram%d", i);
 		set_capacity(disk, xpram_sizes[i] << 1);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 16386c234938..c0479c824d05 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -622,7 +622,7 @@ static int jsfd_init(void)
 
 	err = -ENOMEM;
 	for (i = 0; i < JSF_MAX; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		jsfd_disk[i] = disk;
@@ -648,7 +648,6 @@ static int jsfd_init(void)
 		disk->first_minor = i;
 		sprintf(disk->disk_name, "jsfd%d", i);
 		disk->fops = &jsfd_fops;
-		disk->minor_shift = 0;
 		set_capacity(disk, jdp->dsize >> 9);
 		add_disk(disk);
 		set_device_ro(MKDEV(JSFD_MAJOR, i), 1);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5863cdcf9bba..1b7abd00b167 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1386,7 +1386,7 @@ static int sd_attach(Scsi_Device * sdp)
 	    ((sdp->type != TYPE_DISK) && (sdp->type != TYPE_MOD)))
 		return 0;
 
-	gd = alloc_disk();
+	gd = alloc_disk(16);
 	if (!gd)
 		return 1;
 
@@ -1423,7 +1423,6 @@ static int sd_attach(Scsi_Device * sdp)
         gd->de = sdp->de;
 	gd->major = SD_MAJOR(dsk_nr>>4);
 	gd->first_minor = (dsk_nr & 15)<<4;
-	gd->minor_shift = 4;
 	gd->fops = &sd_fops;
 	if (dsk_nr > 26)
 		sprintf(gd->disk_name, "sd%c%c",'a'+dsk_nr/26-1,'a'+dsk_nr%26);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index ab50575b899c..05fe1b938eb4 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -757,7 +757,7 @@ void sr_finish()
 		 * with loadable modules. */
 		if (cd->disk)
 			continue;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		if (cd->disk) {
@@ -766,7 +766,6 @@ void sr_finish()
 		}
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		strcpy(disk->disk_name, cd->cdi.name);
 		disk->fops = &sr_bdops;
 		disk->flags = GENHD_FL_CD;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5976fa3e466f..b3164b9ca071 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -531,10 +531,7 @@ void del_gendisk(struct gendisk *disk)
 	wipe_partitions(disk);
 	unlink_gendisk(disk);
 	devfs_remove_partitions(disk);
-	if (disk->part) {
-		kfree(disk->part);
-		disk->part = NULL;
-	}
+	disk->flags &= ~GENHD_FL_UP;
 }
 
 struct dev_name {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 62781b452fe9..70c58d8b7e86 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -69,6 +69,7 @@ struct hd_struct {
 #define GENHD_FL_DRIVERFS  2
 #define GENHD_FL_DEVFS	4
 #define GENHD_FL_CD	8
+#define GENHD_FL_UP	16
 
 struct gendisk {
 	int major;			/* major number of driver */
@@ -262,7 +263,7 @@ char *disk_name (struct gendisk *hd, int part, char *buf);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern void update_partition(struct gendisk *disk, int part);
 
-extern struct gendisk *alloc_disk(void);
+extern struct gendisk *alloc_disk(int minors);
 extern void put_disk(struct gendisk *disk);
 
 /* will go away */
-- 
cgit v1.2.3


From 847c633af8a42e49030ff941fb64fb3ece6c95ef Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:23:32 -0700
Subject: [PATCH] disk->minor_shift cleanup

	new field - disk->minors (1 << disk->minor_shift).  Almost all uses
of ->minor_shift had that form and thus had been replaced.
---
 drivers/block/acsi.c     |  5 ++++-
 drivers/block/blkpg.c    |  6 +++---
 drivers/block/genhd.c    |  6 +++---
 drivers/ide/ide-cd.c     |  1 +
 drivers/ide/ide-disk.c   |  1 +
 drivers/ide/ide-floppy.c |  1 +
 fs/block_dev.c           |  4 ++--
 fs/partitions/check.c    | 18 +++++++++---------
 include/linux/genhd.h    |  1 +
 9 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index 006ff8b23e32..7eb385a51182 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -1739,7 +1739,10 @@ int acsi_init( void )
 		sprintf(disk->disk_name, "ad%c", 'a'+i);
 		disk->major = MAJOR_NR;
 		disk->first_minor = i << 4;
-		disk->minor_shift = (acsi_info[i].type==HARDDISK)?4:0;
+		if (acsi_info[i].type != HARDDISK) {
+			disk->minor_shift = 0;
+			disk->minors = 1;
+		}
 		disk->fops = &acsi_fops;
 		set_capacity(disk, acsi_info[i].size);
 		add_disk(disk);
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
index 7b55729fa29a..7fff17616401 100644
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -97,7 +97,7 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EINVAL;
 	if (part)
 		BUG();
-	if (p->pno <= 0 || p->pno >= (1 << g->minor_shift))
+	if (p->pno <= 0 || p->pno >= g->minors)
 		return -EINVAL;
 
 	/* partition number in use? */
@@ -105,7 +105,7 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EBUSY;
 
 	/* overlap? */
-	for (i = 0; i < (1<<g->minor_shift) - 1; i++)
+	for (i = 0; i < g->minors - 1; i++)
 		if (!(ppstart+pplength <= g->part[i].start_sect ||
 		      ppstart >= g->part[i].start_sect + g->part[i].nr_sects))
 			return -EBUSY;
@@ -142,7 +142,7 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EINVAL;
 	if (part)
 		BUG();
-	if (p->pno <= 0 || p->pno >= (1 << g->minor_shift))
+	if (p->pno <= 0 || p->pno >= g->minors)
   		return -EINVAL;
 
 	/* existing drive and partition? */
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index b230df7f7b70..8ecb1461f43e 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -61,7 +61,7 @@ void add_disk(struct gendisk *disk)
 {
 	write_lock(&gendisk_lock);
 	list_add(&disk->list, &gendisks[disk->major].list);
-	if (disk->minor_shift)
+	if (disk->minors > 1)
 		list_add_tail(&disk->full_list, &gendisk_list);
 	else
 		INIT_LIST_HEAD(&disk->full_list);
@@ -107,7 +107,7 @@ get_gendisk(dev_t dev, int *part)
 		disk = list_entry(p, struct gendisk, list);
 		if (disk->first_minor > minor)
 			continue;
-		if (disk->first_minor + (1<<disk->minor_shift) <= minor)
+		if (disk->first_minor + disk->minors <= minor)
 			continue;
 		read_unlock(&gendisk_lock);
 		*part = minor - disk->first_minor;
@@ -163,7 +163,7 @@ static int show_partition(struct seq_file *part, void *v)
 		sgp->major, sgp->first_minor,
 		(unsigned long long)get_capacity(sgp) >> 1,
 		disk_name(sgp, 0, buf));
-	for (n = 0; n < (1<<sgp->minor_shift) - 1; n++) {
+	for (n = 0; n < sgp->minors - 1; n++) {
 		if (sgp->part[n].nr_sects == 0)
 			continue;
 		seq_printf(part, "%4d  %4d %10llu %s\n",
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 6e5d283aa93a..3471aba90f64 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -3193,6 +3193,7 @@ static int ide_cdrom_attach (ide_drive_t *drive)
 	memset(info, 0, sizeof (struct cdrom_info));
 	drive->driver_data = info;
 	DRIVER(drive)->busy++;
+	g->minors = 1;
 	g->minor_shift = 0;
 	g->de = drive->de;
 	g->flags = GENHD_FL_CD;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5ff3daf64280..5b0c1ca8e75d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1871,6 +1871,7 @@ static int idedisk_attach(ide_drive_t *drive)
 		goto failed;
 	}
 	DRIVER(drive)->busy--;
+	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 60e3aed69166..fca1f92f896d 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -2108,6 +2108,7 @@ static int idefloppy_attach (ide_drive_t *drive)
 	DRIVER(drive)->busy++;
 	idefloppy_setup (drive, floppy);
 	DRIVER(drive)->busy--;
+	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 981b8df8efc8..47db3ea5e63b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,7 +540,7 @@ int check_disk_change(struct block_device *bdev)
 	disk = get_gendisk(bdev->bd_dev, &part);
 	if (bdops->revalidate)
 		bdops->revalidate(dev);
-	if (disk && disk->minor_shift)
+	if (disk && disk->minors > 1)
 		bdev->bd_invalidated = 1;
 	return 1;
 }
@@ -799,7 +799,7 @@ static int blkdev_reread_part(struct block_device *bdev)
 	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
 	int res = 0;
 
-	if (!disk || !disk->minor_shift || bdev != bdev->bd_contains)
+	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
 		return -EINVAL;
 	if (part)
 		BUG();
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index b3164b9ca071..72e71ea060e7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -130,7 +130,7 @@ static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
 
 static void driverfs_create_partitions(struct gendisk *hd)
 {
-	int max_p = 1<<hd->minor_shift;
+	int max_p = hd->minors;
 	struct hd_struct *p = hd->part;
 	char name[DEVICE_NAME_SIZE];
 	char bus_id[BUS_ID_SIZE];
@@ -187,7 +187,7 @@ static void driverfs_create_partitions(struct gendisk *hd)
 
 static void driverfs_remove_partitions(struct gendisk *hd)
 {
-	int max_p = 1<<hd->minor_shift;
+	int max_p = hd->minors;
 	struct device *dev;
 	struct hd_struct *p;
 	int part;
@@ -233,7 +233,7 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev)
 		if (isdigit(state->name[strlen(state->name)-1]))
 			sprintf(state->name, "p");
 	}
-	state->limit = 1<<hd->minor_shift;
+	state->limit = hd->minors;
 	for (i = 0; check_part[i]; i++) {
 		int res, j;
 		struct hd_struct *p;
@@ -298,7 +298,7 @@ static void devfs_create_partitions(struct gendisk *dev)
 	unsigned int devfs_flags = DEVFS_FL_DEFAULT;
 	char dirname[64], symlink[16];
 	static devfs_handle_t devfs_handle;
-	int part, max_p = 1<<dev->minor_shift;
+	int part, max_p = dev->minors;
 	struct hd_struct *p = dev->part;
 
 	if (dev->flags & GENHD_FL_REMOVABLE)
@@ -380,7 +380,7 @@ static void devfs_remove_partitions(struct gendisk *dev)
 {
 #ifdef CONFIG_DEVFS_FS
 	int part;
-	for (part = (1<<dev->minor_shift)-1; part--; ) {
+	for (part = dev->minors-1; part--; ) {
 		devfs_unregister(dev->part[part].de);
 		dev->part[part].de = NULL;
 	}
@@ -401,7 +401,7 @@ void register_disk(struct gendisk *disk)
 		devfs_create_cdrom(disk);
 
 	/* No minors to use for partitions */
-	if (!disk->minor_shift)
+	if (disk->minors == 1)
 		return;
 
 	/* No such device (e.g., media were just removed) */
@@ -458,7 +458,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (res)
 		return res;
 	bdev->bd_invalidated = 0;
-	for (p = 0; p < (1<<disk->minor_shift) - 1; p++) {
+	for (p = 0; p < disk->minors - 1; p++) {
 		disk->part[p].start_sect = 0;
 		disk->part[p].nr_sects = 0;
 	}
@@ -466,7 +466,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		bdev->bd_op->revalidate(dev);
 	if (get_capacity(disk))
 		check_partition(disk, bdev);
-	for (p = 1; p < (1<<disk->minor_shift); p++)
+	for (p = 1; p < disk->minors; p++)
 		update_partition(disk, p);
 	return res;
 }
@@ -495,7 +495,7 @@ fail:
 
 static int wipe_partitions(struct gendisk *disk)
 {
-	int max_p = 1 << disk->minor_shift;
+	int max_p = disk->minors;
 	kdev_t devp;
 	int res;
 	int p;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 70c58d8b7e86..6e1f68900bba 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -74,6 +74,7 @@ struct hd_struct {
 struct gendisk {
 	int major;			/* major number of driver */
 	int first_minor;
+	int minors;
 	int minor_shift;		/* number of times minor is shifted to
 					   get real minor */
 	char disk_name[16];		/* name of major driver */
-- 
cgit v1.2.3


From c6973580141ce5a4904436b29c0dc5f3d9982951 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:13 -0700
Subject: [PATCH] block ioctl cleanup

	guts of blkpg.c and blkdev_ioctl() sanitized up and moved into a new
file - drivers/block/ioctl.c.  blkpg.c is gone.
---
 drivers/block/Makefile    |   2 +-
 drivers/block/blkpg.c     | 310 ----------------------------------------------
 drivers/block/floppy.c    |  10 --
 drivers/block/ioctl.c     | 231 ++++++++++++++++++++++++++++++++++
 drivers/block/rd.c        |   2 -
 drivers/mtd/mtdblock.c    |   2 -
 drivers/mtd/mtdblock_ro.c |   2 -
 drivers/mtd/nftlcore.c    |   1 -
 fs/block_dev.c            |  29 -----
 include/linux/blkpg.h     |   1 -
 include/linux/fs.h        |   1 +
 11 files changed, 233 insertions(+), 358 deletions(-)
 delete mode 100644 drivers/block/blkpg.c
 create mode 100644 drivers/block/ioctl.c

(limited to 'fs')

diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 8457b1bfa13a..6c22bb8963d6 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -11,7 +11,7 @@
 export-objs	:= elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
 		   scsi_ioctl.o deadline-iosched.o
 
-obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o scsi_ioctl.o deadline-iosched.o
+obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o deadline-iosched.o
 
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
deleted file mode 100644
index 7fff17616401..000000000000
--- a/drivers/block/blkpg.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Partition table and disk geometry handling
- *
- * This obsoletes the partition-handling code in genhd.c:
- * Userspace can look at a disk in arbitrary format and tell
- * the kernel what partitions there are on the disk, and how
- * these should be numbered.
- * It also allows one to repartition a disk that is being used.
- *
- * A single ioctl with lots of subfunctions:
- *
- * Device number stuff:
- *    get_whole_disk()          (given the device number of a partition, find
- *                               the device number of the encompassing disk)
- *    get_all_partitions()      (given the device number of a disk, return the
- *                               device numbers of all its known partitions)
- *
- * Partition stuff:
- *    add_partition()
- *    delete_partition()
- *    test_partition_in_use()   (also for test_disk_in_use)
- *
- * Geometry stuff:
- *    get_geometry()
- *    set_geometry()
- *    get_bios_drivedata()
- *
- * For today, only the partition stuff - aeb, 990515
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>			/* for BLKROSET, ... */
-#include <linux/sched.h>		/* for capable() */
-#include <linux/blk.h>			/* for set_device_ro() */
-#include <linux/blkpg.h>
-#include <linux/genhd.h>
-#include <linux/module.h>               /* for EXPORT_SYMBOL */
-#include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
-
-#include <asm/uaccess.h>
-
-/*
- * What is the data describing a partition?
- *
- * 1. a device number (kdev_t)
- * 2. a starting sector and number of sectors (hd_struct)
- *    given in the part[] array of the gendisk structure for the drive.
- *
- * The number of sectors is replicated in the sizes[] array of
- * the gendisk structure for the major, which again is copied to
- * the blk_size[][] array.
- * (However, hd_struct has the number of 512-byte sectors,
- *  g->sizes[] and blk_size[][] have the number of 1024-byte blocks.)
- * Note that several drives may have the same major.
- */
-
-/*
- * Add a partition.
- *
- * returns: EINVAL: bad parameters
- *          ENXIO: cannot find drive
- *          EBUSY: proposed partition overlaps an existing one
- *                 or has the same number as an existing one
- *          0: all OK.
- */
-int add_partition(struct block_device *bdev, struct blkpg_partition *p)
-{
-	struct gendisk *g;
-	long long ppstart, pplength;
-	int part, i;
-
-	/* convert bytes to sectors */
-	ppstart = (p->start >> 9);
-	pplength = (p->length >> 9);
-
-	/* check for fit in a hd_struct */ 
-	if (sizeof(sector_t) == sizeof(long) && 
-	    sizeof(long long) > sizeof(long)) {
-		long pstart, plength;
-		pstart = ppstart;
-		plength = pplength;
-		if (pstart != ppstart || plength != pplength
-		    || pstart < 0 || plength < 0)
-			return -EINVAL;
-	}
-
-	/* find the drive major */
-	g = get_gendisk(bdev->bd_dev, &part);
-	if (!g)
-		return -ENXIO;
-
-	/* existing drive? */
-
-	/* drive and partition number OK? */
-	if (bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (p->pno <= 0 || p->pno >= g->minors)
-		return -EINVAL;
-
-	/* partition number in use? */
-	if (g->part[p->pno - 1].nr_sects != 0)
-		return -EBUSY;
-
-	/* overlap? */
-	for (i = 0; i < g->minors - 1; i++)
-		if (!(ppstart+pplength <= g->part[i].start_sect ||
-		      ppstart >= g->part[i].start_sect + g->part[i].nr_sects))
-			return -EBUSY;
-
-	/* all seems OK */
-	g->part[p->pno - 1].start_sect = ppstart;
-	g->part[p->pno - 1].nr_sects = pplength;
-	update_partition(g, p->pno);
-	return 0;
-}
-
-/*
- * Delete a partition given by partition number
- *
- * returns: EINVAL: bad parameters
- *          ENXIO: cannot find partition
- *          EBUSY: partition is busy
- *          0: all OK.
- *
- * Note that the dev argument refers to the entire disk, not the partition.
- */
-int del_partition(struct block_device *bdev, struct blkpg_partition *p)
-{
-	struct gendisk *g;
-	struct block_device *bdevp;
-	int part;
-	int holder;
-
-	/* find the drive major */
-	g = get_gendisk(bdev->bd_dev, &part);
-	if (!g)
-		return -ENXIO;
-	if (bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (p->pno <= 0 || p->pno >= g->minors)
-  		return -EINVAL;
-
-	/* existing drive and partition? */
-	if (g->part[p->pno - 1].nr_sects == 0)
-		return -ENXIO;
-
-	/* partition in use? Incomplete check for now. */
-	bdevp = bdget(MKDEV(g->major, g->first_minor + p->pno));
-	if (!bdevp)
-		return -ENOMEM;
-	if (bd_claim(bdevp, &holder) < 0) {
-		bdput(bdevp);
-		return -EBUSY;
-	}
-
-	/* all seems OK */
-	fsync_bdev(bdevp);
-	invalidate_bdev(bdevp, 0);
-
-	g->part[p->pno - 1].start_sect = 0;
-	g->part[p->pno - 1].nr_sects = 0;
-	update_partition(g, p->pno);
-	bd_release(bdevp);
-	bdput(bdevp);
-
-	return 0;
-}
-
-int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
-{
-	struct blkpg_ioctl_arg a;
-	struct blkpg_partition p;
-	int len;
-
-	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
-		return -EFAULT;
-
-	switch (a.op) {
-		case BLKPG_ADD_PARTITION:
-		case BLKPG_DEL_PARTITION:
-			len = a.datalen;
-			if (len < sizeof(struct blkpg_partition))
-				return -EINVAL;
-			if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
-				return -EFAULT;
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (a.op == BLKPG_ADD_PARTITION)
-				return add_partition(bdev, &p);
-			else
-				return del_partition(bdev, &p);
-		default:
-			return -EINVAL;
-	}
-}
-
-/*
- * Common ioctl's for block devices
- */
-int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
-{
-	request_queue_t *q;
-	u64 ullval = 0;
-	int intval;
-	unsigned short usval;
-	kdev_t dev = to_kdev_t(bdev->bd_dev);
-	int holder;
-	struct backing_dev_info *bdi;
-
-	switch (cmd) {
-		case BLKROSET:
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (get_user(intval, (int *)(arg)))
-				return -EFAULT;
-			set_device_ro(dev, intval);
-			return 0;
-		case BLKROGET:
-			intval = (bdev_read_only(bdev) != 0);
-			return put_user(intval, (int *)(arg));
-
-		case BLKRASET:
-		case BLKFRASET:
-			if(!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			bdi = blk_get_backing_dev_info(bdev);
-			if (bdi == NULL)
-				return -ENOTTY;
-			bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
-			return 0;
-
-		case BLKRAGET:
-		case BLKFRAGET:
-			if (!arg)
-				return -EINVAL;
-			bdi = blk_get_backing_dev_info(bdev);
-			if (bdi == NULL)
-				return -ENOTTY;
-			return put_user((bdi->ra_pages * PAGE_CACHE_SIZE) / 512,
-						(long *)arg);
-
-		case BLKSECTGET:
-			if ((q = bdev_get_queue(bdev)) == NULL)
-				return -EINVAL;
-
-			usval = q->max_sectors;
-			blk_put_queue(q);
-			return put_user(usval, (unsigned short *)arg);
-
-		case BLKFLSBUF:
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			fsync_bdev(bdev);
-			invalidate_bdev(bdev, 0);
-			return 0;
-
-		case BLKSSZGET:
-			/* get block device hardware sector size */
-			intval = bdev_hardsect_size(bdev);
-			return put_user(intval, (int *) arg);
-
-		case BLKGETSIZE: 
-		{
-			unsigned long ret;
-			/* size in sectors, works up to 2 TB */
-			ullval = bdev->bd_inode->i_size;
-			ret = ullval >> 9;
-			if ((u64)ret != (ullval >> 9))
-				return -EFBIG;
-			return put_user(ret, (unsigned long *) arg);
-		}
-		
-		case BLKGETSIZE64:
-			/* size in bytes */
-			ullval = bdev->bd_inode->i_size;
-			return put_user(ullval, (u64 *) arg);
-
-		case BLKPG:
-			return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg *) arg);
-		case BLKBSZGET:
-			/* get the logical block size (cf. BLKSSZGET) */
-			intval = block_size(bdev);
-			return put_user(intval, (int *) arg);
-
-		case BLKBSZSET:
-			/* set the logical block size */
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (!arg)
-				return -EINVAL;
-			if (get_user(intval, (int *) arg))
-				return -EFAULT;
-			if (intval > PAGE_SIZE || intval < 512 ||
-			    (intval & (intval - 1)))
-				return -EINVAL;
-			if (bd_claim(bdev, &holder) < 0)
-				return -EBUSY;
-			set_blocksize(bdev, intval);
-			bd_release(bdev);
-			return 0;
-
-		default:
-			return -ENOTTY;
-	}
-}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8783ee17314d..3fde460ce7ea 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3488,16 +3488,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			loc.start = 0;
 			return _COPYOUT(loc);
 		}
-
-		case BLKGETSIZE:
-			ECALL(get_floppy_geometry(drive, type, &g));
-			return put_user(g->size, (unsigned long *) param);
-
-		case BLKGETSIZE64:
-			ECALL(get_floppy_geometry(drive, type, &g));
-			return put_user((u64)g->size << 9, (u64 *) param);
-		/* BLKRRPART is not defined as floppies don't have
-		 * partition tables */
 	}
 
 	/* convert the old style command into a new style command */
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
new file mode 100644
index 000000000000..fb6a8edb8e21
--- /dev/null
+++ b/drivers/block/ioctl.c
@@ -0,0 +1,231 @@
+#include <linux/sched.h>		/* for capable() */
+#include <linux/blk.h>			/* for set_device_ro() */
+#include <linux/blkpg.h>
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <asm/uaccess.h>
+
+static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
+{
+	struct block_device *bdevp;
+	int holder;
+	struct gendisk *disk;
+	struct blkpg_ioctl_arg a;
+	struct blkpg_partition p;
+	long long start, length;
+	int part;
+	int i;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+		return -EFAULT;
+	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+		return -EFAULT;
+	disk = get_gendisk(bdev->bd_dev, &part);
+	if (!disk)
+		return -ENXIO;
+	if (bdev != bdev->bd_contains)
+		return -EINVAL;
+	if (part)
+		BUG();
+	part = p.pno;
+	if (part <= 0 || part >= disk->minors)
+		return -EINVAL;
+
+	switch (a.op) {
+		case BLKPG_ADD_PARTITION:
+			start = p.start >> 9;
+			length = p.length >> 9;
+			/* check for fit in a hd_struct */ 
+			if (sizeof(sector_t) == sizeof(long) && 
+			    sizeof(long long) > sizeof(long)) {
+				long pstart = start, plength = length;
+				if (pstart != start || plength != length
+				    || pstart < 0 || plength < 0)
+					return -EINVAL;
+			}
+
+			/* partition number in use? */
+			if (disk->part[part - 1].nr_sects != 0)
+				return -EBUSY;
+
+			/* overlap? */
+			for (i = 0; i < disk->minors - 1; i++) {
+				struct hd_struct *s = &disk->part[i];
+				if (!(start+length <= s->start_sect ||
+				      start >= s->start_sect + s->nr_sects))
+					return -EBUSY;
+			}
+			/* all seems OK */
+			disk->part[part - 1].start_sect = start;
+			disk->part[part - 1].nr_sects = length;
+			update_partition(disk, part);
+			return 0;
+		case BLKPG_DEL_PARTITION:
+			if (disk->part[part - 1].nr_sects == 0)
+				return -ENXIO;
+
+			/* partition in use? Incomplete check for now. */
+			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
+			if (!bdevp)
+				return -ENOMEM;
+			if (bd_claim(bdevp, &holder) < 0) {
+				bdput(bdevp);
+				return -EBUSY;
+			}
+
+			/* all seems OK */
+			fsync_bdev(bdevp);
+			invalidate_bdev(bdevp, 0);
+
+			disk->part[part].start_sect = 0;
+			disk->part[part].nr_sects = 0;
+			update_partition(disk, part);
+			bd_release(bdevp);
+			bdput(bdevp);
+			return 0;
+		default:
+			return -EINVAL;
+	}
+}
+
+static int blkdev_reread_part(struct block_device *bdev)
+{
+	int part;
+	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
+	int res = 0;
+
+	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
+		return -EINVAL;
+	if (part)
+		BUG();
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (down_trylock(&bdev->bd_sem))
+		return -EBUSY;
+	res = rescan_partitions(disk, bdev);
+	up(&bdev->bd_sem);
+	return res;
+}
+
+static int put_ushort(unsigned long arg, unsigned short val)
+{
+	return put_user(val, (unsigned short *)arg);
+}
+
+static int put_int(unsigned long arg, int val)
+{
+	return put_user(val, (int *)arg);
+}
+
+static int put_long(unsigned long arg, long val)
+{
+	return put_user(val, (long *)arg);
+}
+
+static int put_ulong(unsigned long arg, unsigned long val)
+{
+	return put_user(val, (unsigned long *)arg);
+}
+
+static int put_u64(unsigned long arg, u64 val)
+{
+	return put_user(val, (u64 *)arg);
+}
+
+int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
+			unsigned long arg)
+{
+	struct block_device *bdev = inode->i_bdev;
+	struct backing_dev_info *bdi;
+	int holder;
+	int ret, n;
+
+	switch (cmd) {
+	case BLKELVGET:
+	case BLKELVSET:
+		/* deprecated, use the /proc/iosched interface instead */
+		return -ENOTTY;
+	case BLKRAGET:
+	case BLKFRAGET:
+		if (!arg)
+			return -EINVAL;
+		bdi = blk_get_backing_dev_info(bdev);
+		if (bdi == NULL)
+			return -ENOTTY;
+		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+	case BLKROGET:
+		return put_int(arg, bdev_read_only(bdev) != 0);
+	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+		return put_int(arg, block_size(bdev));
+	case BLKSSZGET: /* get block device hardware sector size */
+		return put_int(arg, bdev_hardsect_size(bdev));
+	case BLKSECTGET:
+		return put_ushort(arg, bdev->bd_queue->max_sectors);
+	case BLKRASET:
+	case BLKFRASET:
+		if(!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		bdi = blk_get_backing_dev_info(bdev);
+		if (bdi == NULL)
+			return -ENOTTY;
+		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		return 0;
+	case BLKBSZSET:
+		/* set the logical block size */
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (!arg)
+			return -EINVAL;
+		if (get_user(n, (int *) arg))
+			return -EFAULT;
+		if (n > PAGE_SIZE || n < 512 || (n & (n - 1)))
+			return -EINVAL;
+		if (bd_claim(bdev, &holder) < 0)
+			return -EBUSY;
+		set_blocksize(bdev, n);
+		bd_release(bdev);
+		return 0;
+	case BLKPG:
+		return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg *) arg);
+	case BLKRRPART:
+		return blkdev_reread_part(bdev);
+	case BLKGETSIZE:
+		if ((bdev->bd_inode->i_size >> 9) > ~0UL)
+			return -EFBIG;
+		return put_ulong(arg, bdev->bd_inode->i_size >> 9);
+	case BLKGETSIZE64:
+		return put_u64(arg, bdev->bd_inode->i_size);
+	case BLKFLSBUF:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+		fsync_bdev(bdev);
+		invalidate_bdev(bdev, 0);
+		return 0;
+	case BLKROSET:
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (get_user(n, (int *)(arg)))
+			return -EFAULT;
+		set_device_ro(to_kdev_t(bdev->bd_dev), n);
+		return 0;
+	default:
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+	}
+	return -ENOTTY;
+}
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 391664b9a34f..7d72b786080c 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -291,8 +291,6 @@ static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, un
 	if (cmd != BLKFLSBUF)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
 	/* special: we want to release the ramdisk memory,
 	   it's not like with the other blockdevices where
 	   this ioctl only flushes away the buffer cache. */
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 6b32d3cfb390..a39bcab25891 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -517,8 +517,6 @@ static int mtdblock_ioctl(struct inode * inode, struct file * file,
 
 	switch (cmd) {
 	case BLKFLSBUF:
-		if(!capable(CAP_SYS_ADMIN))
-			return -EACCES;
 		fsync_bdev(inode->i_bdev);
 		invalidate_bdev(inode->i_bdev, 0);
 		down(&mtdblk->cache_sem);
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 97e8437a75d5..1878f540f3b6 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -201,8 +201,6 @@ static int mtdblock_ioctl(struct inode * inode, struct file * file,
 	if (!mtd || cmd != BLKFLSBUF)
 		return -EINVAL;
 
-	if(!capable(CAP_SYS_ADMIN))
-		return -EACCES;
 	fsync_bdev(inode->i_bdev);
 	invalidate_bdev(inode->i_bdev, 0);
 	if (mtd->sync)
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 155aa92a9429..292894af8252 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -770,7 +770,6 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd
 		return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0;
 	}
 	case BLKFLSBUF:
-		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 		fsync_bdev(inode->i_bdev);
 		invalidate_bdev(inode->i_bdev, 0);
 		if (nftl->mtd->sync)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 47db3ea5e63b..dff0244e63a6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -793,25 +793,6 @@ int blkdev_close(struct inode * inode, struct file * filp)
 	return blkdev_put(inode->i_bdev, BDEV_FILE);
 }
 
-static int blkdev_reread_part(struct block_device *bdev)
-{
-	int part;
-	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
-	int res = 0;
-
-	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
-		return -EBUSY;
-	res = rescan_partitions(disk, bdev);
-	up(&bdev->bd_sem);
-	return res;
-}
-
 static ssize_t blkdev_file_write(struct file *file, const char *buf,
 				   size_t count, loff_t *ppos)
 {
@@ -820,16 +801,6 @@ static ssize_t blkdev_file_write(struct file *file, const char *buf,
 	return generic_file_write_nolock(file, &local_iov, 1, ppos);
 }
 
-static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
-{
-	struct block_device *bdev = inode->i_bdev;
-	int ret = blk_ioctl(bdev, cmd, arg);
-	if (ret == -ENOTTY && bdev->bd_op->ioctl)
-		ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
-	return ret;
-}
-
 struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index 3cfedb07f803..571618972e30 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -57,7 +57,6 @@ struct blkpg_partition {
 #ifdef __KERNEL__
 
 extern char * partition_name(dev_t dev);
-extern int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg);
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 93148f1659b0..cac13f931cec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1087,6 +1087,7 @@ extern struct file_operations def_blk_fops;
 extern struct address_space_operations def_blk_aops;
 extern struct file_operations def_fifo_fops;
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
+extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *, mode_t, unsigned, int);
 extern int blkdev_put(struct block_device *, int);
 extern int bd_claim(struct block_device *, void *);
-- 
cgit v1.2.3


From afae25b7c8d594f6349e81dce2b16ce44aa9f0ed Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:18 -0700
Subject: [PATCH] preparation to use of driverfs refcounts, part 1 - partitions

	* update_partition() split into add_partition() and delete_partition().
	* all updating of ->part[] is switched to these two (including initial
filling/final cleaning).
	* per-partition devices are allocated on-demand and never reused.
We allocate struct device in add_partition() and put reference to it into
hd_struct.  ->release() for that struct device frees it.  delete_partition()
removes reference from hd_struct and does put_device() on it.  Basically,
we get rid of problems with reused struct device by never reusing them...
	At that point devices for partitions are nice and sane.
---
 drivers/block/ioctl.c |   8 +-
 fs/partitions/check.c | 289 ++++++++++++++++++++++----------------------------
 include/linux/genhd.h |   5 +-
 3 files changed, 134 insertions(+), 168 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index fb6a8edb8e21..4af05bc32db2 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -58,9 +58,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 					return -EBUSY;
 			}
 			/* all seems OK */
-			disk->part[part - 1].start_sect = start;
-			disk->part[part - 1].nr_sects = length;
-			update_partition(disk, part);
+			add_partition(disk, part, start, length);
 			return 0;
 		case BLKPG_DEL_PARTITION:
 			if (disk->part[part - 1].nr_sects == 0)
@@ -79,9 +77,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
 
-			disk->part[part].start_sect = 0;
-			disk->part[part].nr_sects = 0;
-			update_partition(disk, part);
+			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
 			return 0;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 72e71ea060e7..a61a83ded312 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -130,96 +130,49 @@ static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
 
 static void driverfs_create_partitions(struct gendisk *hd)
 {
-	int max_p = hd->minors;
-	struct hd_struct *p = hd->part;
-	char name[DEVICE_NAME_SIZE];
-	char bus_id[BUS_ID_SIZE];
-	struct device *dev, *parent;
-	int part;
+	struct device *parent = hd->driverfs_dev;
+	struct device *dev = &hd->disk_dev;
 
 	/* if driverfs not supported by subsystem, skip partitions */
 	if (!(hd->flags & GENHD_FL_DRIVERFS))
 		return;
 
-	parent = hd->driverfs_dev;
-
 	if (parent)  {
-		sprintf(name, "%s", parent->name);
-		sprintf(bus_id, "%s:", parent->bus_id);
+		sprintf(dev->name, "%sdisc", parent->name);
+		sprintf(dev->bus_id, "%sdisc", parent->bus_id);
+		dev->parent = parent;
+		dev->bus = parent->bus;
 	} else {
-		*name = *bus_id = '\0';
+		sprintf(dev->name, "disc");
+		sprintf(dev->bus_id, "disc");
 	}
-
-	dev = &hd->disk_dev;
 	dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor);
-	sprintf(dev->name, "%sdisc", name);
-	sprintf(dev->bus_id, "%sdisc", bus_id);
-	for (part=1; part < max_p; part++) {
-		dev = &p[part-1].hd_driverfs_dev;
-		sprintf(dev->name, "%spart%d", name, part);
-		sprintf(dev->bus_id, "%s:p%d", bus_id, part);
-		if (!p[part-1].nr_sects)
-			continue;
-		dev->driver_data =
-				(void *)(long)__mkdev(hd->major, hd->first_minor+part);
-	}
-
-	dev = &hd->disk_dev;
-	dev->parent = parent;
-	if (parent)
-		dev->bus = parent->bus;
 	device_register(dev);
 	device_create_file(dev, &dev_attr_type);
 	device_create_file(dev, &dev_attr_kdev);
-
-	for (part=0; part < max_p-1; part++) {
-		dev = &p[part].hd_driverfs_dev;
-		dev->parent = parent;
-		if (parent)
-			dev->bus = parent->bus;
-		if (!dev->driver_data)
-			continue;
-		device_register(dev);
-		device_create_file(dev, &dev_attr_type);
-		device_create_file(dev, &dev_attr_kdev);
-	}
 }
 
 static void driverfs_remove_partitions(struct gendisk *hd)
 {
-	int max_p = hd->minors;
-	struct device *dev;
-	struct hd_struct *p;
-	int part;
-
-	for (part=1, p = hd->part; part < max_p; part++, p++) {
-		dev = &p->hd_driverfs_dev;
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
-		}
-	}
-	dev = &hd->disk_dev;
-	if (dev->driver_data) {
-		device_remove_file(dev, &dev_attr_type);
-		device_remove_file(dev, &dev_attr_kdev);
-		put_device(dev);	
-		dev->driver_data = NULL;
-	}
+	struct device *dev = &hd->disk_dev;
+	if (!(hd->flags & GENHD_FL_DRIVERFS))
+		return;
+	device_remove_file(dev, &dev_attr_type);
+	device_remove_file(dev, &dev_attr_kdev);
+	put_device(dev);	
 }
 
-static void check_partition(struct gendisk *hd, struct block_device *bdev)
+static struct parsed_partitions *
+check_partition(struct gendisk *hd, struct block_device *bdev)
 {
+	struct parsed_partitions *state;
 	devfs_handle_t de = NULL;
 	char buf[64];
-	struct parsed_partitions *state;
-	int i;
+	int i, res;
 
 	state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
 	if (!state)
-		return;
+		return NULL;
 
 	if (hd->flags & GENHD_FL_DEVFS)
 		de = hd->de;
@@ -234,31 +187,19 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev)
 			sprintf(state->name, "p");
 	}
 	state->limit = hd->minors;
-	for (i = 0; check_part[i]; i++) {
-		int res, j;
-		struct hd_struct *p;
+	i = res = 0;
+	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
-		res = check_part[i](state, bdev);
-		if (!res)
-			continue;
-		if (res < 0) {
-			if (warn_no_part)
-				printk(" unable to read partition table\n");
-			return;
-		} 
-		p = hd->part;
-		for (j = 1; j < state->limit; j++) {
-			p[j-1].start_sect = state->parts[j].from;
-			p[j-1].nr_sects = state->parts[j].size;
-#if CONFIG_BLK_DEV_MD
-			if (!state->parts[j].flags)
-				continue;
-			md_autodetect_dev(bdev->bd_dev+j);
-#endif
-		}
-		return;
+		res = check_part[i++](state, bdev);
 	}
-	printk(" unknown partition table\n");
+	if (res > 0)
+		return state;
+	if (!res)
+		printk(" unknown partition table\n");
+	else if (warn_no_part)
+		printk(" unable to read partition table\n");
+	kfree(state);
+	return NULL;
 }
 
 static void devfs_register_partition(struct gendisk *dev, int part)
@@ -329,9 +270,6 @@ static void devfs_create_partitions(struct gendisk *dev)
 	devfs_auto_unregister(dev->disk_de, slave);
 	if (!(dev->flags & GENHD_FL_DEVFS))
 		devfs_auto_unregister (slave, dir);
-	for (part = 1; part < max_p; part++, p++)
-		if (p->nr_sects)
-			devfs_register_partition(dev, part);
 #endif
 }
 
@@ -379,11 +317,6 @@ static void devfs_create_cdrom(struct gendisk *dev)
 static void devfs_remove_partitions(struct gendisk *dev)
 {
 #ifdef CONFIG_DEVFS_FS
-	int part;
-	for (part = dev->minors-1; part--; ) {
-		devfs_unregister(dev->part[part].de);
-		dev->part[part].de = NULL;
-	}
 	devfs_unregister(dev->disk_de);
 	dev->disk_de = NULL;
 	if (dev->flags & GENHD_FL_CD)
@@ -393,10 +326,69 @@ static void devfs_remove_partitions(struct gendisk *dev)
 #endif
 }
 
+void delete_partition(struct gendisk *disk, int part)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *dev;
+	if (!p->nr_sects)
+		return;
+	p->start_sect = 0;
+	p->nr_sects = 0;
+	devfs_unregister(p->de);
+	dev = p->hd_driverfs_dev;
+	p->hd_driverfs_dev = NULL;
+	if (dev) {
+		device_remove_file(dev, &dev_attr_type);
+		device_remove_file(dev, &dev_attr_kdev);
+		device_unregister(dev);	
+	}
+}
+
+static void part_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *parent = disk->disk_dev.parent;
+	struct device *dev;
+
+	p->start_sect = start;
+	p->nr_sects = len;
+	devfs_register_partition(disk, part);
+	if (!(disk->flags & GENHD_FL_DRIVERFS))
+		return;
+	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
+		return;
+	memset(dev, 0, sizeof(struct device));
+	if (parent)  {
+		sprintf(dev->name, "%spart%d", parent->name, part);
+		sprintf(dev->bus_id, "%s:p%d", parent->bus_id, part);
+		dev->parent = parent;
+		dev->bus = parent->bus;
+	} else {
+		sprintf(dev->name, "part%d", part);
+		sprintf(dev->bus_id, "p%d", part);
+	}
+	dev->release = part_release;
+	dev->driver_data =
+		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
+	device_register(dev);
+	device_create_file(dev, &dev_attr_type);
+	device_create_file(dev, &dev_attr_kdev);
+	p->hd_driverfs_dev = dev;
+}
+
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct parsed_partitions *state;
 	struct block_device *bdev;
+	int j;
+
 	if (disk->flags & GENHD_FL_CD)
 		devfs_create_cdrom(disk);
 
@@ -411,45 +403,33 @@ void register_disk(struct gendisk *disk)
 	bdev = bdget(MKDEV(disk->major, disk->first_minor));
 	if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 		return;
-	check_partition(disk, bdev);
+	state = check_partition(disk, bdev);
 	driverfs_create_partitions(disk);
 	devfs_create_partitions(disk);
-	blkdev_put(bdev, BDEV_RAW);
-}
-
-void update_partition(struct gendisk *disk, int part)
-{
-	struct hd_struct *p = disk->part + part - 1;
-	struct device *dev = &p->hd_driverfs_dev;
-
-	if (!p->nr_sects) {
-		if (p->de) {
-			devfs_unregister(p->de);
-			p->de = NULL;
-		}
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
+	if (state) {
+		for (j = 1; j < state->limit; j++) {
+			sector_t size = state->parts[j].size;
+			sector_t from = state->parts[j].from;
+			if (!size)
+				continue;
+			add_partition(disk, j, from, size);
+#if CONFIG_BLK_DEV_MD
+			if (!state->parts[j].flags)
+				continue;
+			md_autodetect_dev(bdev->bd_dev+j);
+#endif
 		}
-		return;
+		kfree(state);
 	}
-	if (!p->de)
-		devfs_register_partition(disk, part);
-	if (dev->driver_data || !(disk->flags & GENHD_FL_DRIVERFS))
-		return;
-	dev->driver_data =
-		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
+	blkdev_put(bdev, BDEV_RAW);
 }
 
 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 {
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
+	struct parsed_partitions *state;
 	int p, res;
+
 	if (!bdev->bd_invalidated)
 		return 0;
 	if (bdev->bd_part_count)
@@ -458,16 +438,25 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (res)
 		return res;
 	bdev->bd_invalidated = 0;
-	for (p = 0; p < disk->minors - 1; p++) {
-		disk->part[p].start_sect = 0;
-		disk->part[p].nr_sects = 0;
-	}
+	for (p = 1; p < disk->minors; p++)
+		delete_partition(disk, p);
 	if (bdev->bd_op->revalidate)
 		bdev->bd_op->revalidate(dev);
-	if (get_capacity(disk))
-		check_partition(disk, bdev);
-	for (p = 1; p < disk->minors; p++)
-		update_partition(disk, p);
+	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
+		return res;
+	for (p = 1; p < state->limit; p++) {
+		sector_t size = state->parts[p].size;
+		sector_t from = state->parts[p].from;
+		if (!size)
+			continue;
+		add_partition(disk, p, from, size);
+#if CONFIG_BLK_DEV_MD
+		if (!state->parts[j].flags)
+			continue;
+		md_autodetect_dev(bdev->bd_dev+p);
+#endif
+	}
+	kfree(state);
 	return res;
 }
 
@@ -493,45 +482,25 @@ fail:
 	return NULL;
 }
 
-static int wipe_partitions(struct gendisk *disk)
+void del_gendisk(struct gendisk *disk)
 {
 	int max_p = disk->minors;
 	kdev_t devp;
-	int res;
 	int p;
 
 	/* invalidate stuff */
 	for (p = max_p - 1; p > 0; p--) {
 		devp = mk_kdev(disk->major,disk->first_minor + p);
-#if 0					/* %%% superfluous? */
-		if (disk->part[p-1].nr_sects == 0)
-			continue;
-#endif
-		res = invalidate_device(devp, 1);
-		if (res)
-			return res;
-		disk->part[p-1].start_sect = 0;
-		disk->part[p-1].nr_sects = 0;
+		invalidate_device(devp, 1);
+		delete_partition(disk, p);
 	}
 	devp = mk_kdev(disk->major,disk->first_minor);
-#if 0					/* %%% superfluous? */
-	if (disk->part[p].nr_sects == 0)
-		continue;
-#endif
-	res = invalidate_device(devp, 1);
-	if (res)
-		return res;
+	invalidate_device(devp, 1);
 	disk->capacity = 0;
-	return 0;
-}
-
-void del_gendisk(struct gendisk *disk)
-{
-	driverfs_remove_partitions(disk);
-	wipe_partitions(disk);
+	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
+	driverfs_remove_partitions(disk);
 	devfs_remove_partitions(disk);
-	disk->flags &= ~GENHD_FL_UP;
 }
 
 struct dev_name {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6e1f68900bba..6b859fad6a8a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -62,7 +62,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	devfs_handle_t de;              /* primary (master) devfs entry  */
-	struct device hd_driverfs_dev;  /* support driverfs hiearchy     */
+	struct device *hd_driverfs_dev;  /* support driverfs hiearchy     */
 };
 
 #define GENHD_FL_REMOVABLE  1
@@ -262,7 +262,8 @@ struct unixware_disklabel {
 char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void update_partition(struct gendisk *disk, int part);
+extern void add_partition(struct gendisk *, int, sector_t, sector_t);
+extern void delete_partition(struct gendisk *, int);
 
 extern struct gendisk *alloc_disk(int minors);
 extern void put_disk(struct gendisk *disk);
-- 
cgit v1.2.3


From b288f6add39cf474fc2ec8087d32d3e1d4c1c6d0 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:24 -0700
Subject: [PATCH] preparation to use of driverfs refcounts, part 2 - disk

	* disk->disk_dev is initialized in alloc_disk(), device_add()'d in
	  add_disk(), device_del()'d in unregister_disk() and device_put() in
	  put_disk().
	* devices of partitions are made its children.
	* attributes of disk one: dev (dev_t of the thing), range (number of
	  minors) and size (in sectors).
	* attributes of partition ones: dev (ditto), start (in sectors) and
	  size (in sectors).
	* disk devices are put on a new bus - "block"
	* if caller of add_disk() had set disk->driverfs_dev, we set symlinks:
	  "device" from disk to underlying device and "block" from underlying
	  device to disk.
	* ->release() of disk_dev frees disk and disk->part.
	At that point we have sane driverfs subtree for each gendisk and
refcount of its root (disk->disk_dev) can act as gendisk refcount.
---
 drivers/block/genhd.c    |  23 ++++-
 drivers/ide/ide-cd.c     |   1 +
 drivers/ide/ide-disk.c   |   1 +
 drivers/ide/ide-floppy.c |   1 +
 drivers/scsi/sr.c        |  34 +-------
 fs/partitions/check.c    | 221 ++++++++++++++++++++++++++++++++---------------
 include/linux/cdrom.h    |   1 -
 7 files changed, 173 insertions(+), 109 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 8ecb1461f43e..ecb2dcdf214d 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -192,6 +192,10 @@ struct device_class disk_devclass = {
 	.name		= "disk",
 };
 
+static struct bus_type disk_bus = {
+	name:		"block",
+};
+
 int __init device_init(void)
 {
 	int i;
@@ -200,6 +204,7 @@ int __init device_init(void)
 		INIT_LIST_HEAD(&gendisks[i].list);
 	blk_dev_init();
 	devclass_register(&disk_devclass);
+	bus_register(&disk_bus);
 	return 0;
 }
 
@@ -207,6 +212,13 @@ __initcall(device_init);
 
 EXPORT_SYMBOL(disk_devclass);
 
+static void disk_release(struct device *dev)
+{
+	struct gendisk *disk = dev->driver_data;
+	kfree(disk->part);
+	kfree(disk);
+}
+
 struct gendisk *alloc_disk(int minors)
 {
 	struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
@@ -224,16 +236,19 @@ struct gendisk *alloc_disk(int minors)
 		disk->minors = minors;
 		while (minors >>= 1)
 			disk->minor_shift++;
+		disk->disk_dev.bus = &disk_bus;
+		disk->disk_dev.release = disk_release;
+		disk->disk_dev.driver_data = disk;
+		device_initialize(&disk->disk_dev);
 	}
 	return disk;
 }
 
 void put_disk(struct gendisk *disk)
 {
-	if (disk) {
-		kfree(disk->part);
-		kfree(disk);
-	}
+	if (disk)
+		put_device(&disk->disk_dev);
 }
+
 EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3471aba90f64..8fffe423ab14 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -3196,6 +3196,7 @@ static int ide_cdrom_attach (ide_drive_t *drive)
 	g->minors = 1;
 	g->minor_shift = 0;
 	g->de = drive->de;
+	g->driverfs_dev = &drive->gendev;
 	g->flags = GENHD_FL_CD;
 	if (ide_cdrom_setup(drive)) {
 		struct cdrom_device_info *devinfo = &info->devinfo;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5b0c1ca8e75d..aecd9a7de7ed 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1874,6 +1874,7 @@ static int idedisk_attach(ide_drive_t *drive)
 	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
+	g->driverfs_dev = &drive->gendev;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
 	g->flags |= GENHD_FL_DEVFS;
 	set_capacity(g, current_capacity(drive));
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fca1f92f896d..f10543ba3d8f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -2110,6 +2110,7 @@ static int idefloppy_attach (ide_drive_t *drive)
 	DRIVER(drive)->busy--;
 	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
+	g->driverfs_dev = &drive->gendev;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
 	g->flags |= GENHD_FL_DEVFS;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 05fe1b938eb4..39af5cce16f0 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -726,24 +726,6 @@ cleanup_dev:
 	return 1;
 }
 
-/* Driverfs file support */
-static ssize_t sr_device_kdev_read(struct device *driverfs_dev, 
-				   char *page, size_t count, loff_t off)
-{
-	kdev_t kdev; 
-	kdev.value=(int)(long)driverfs_dev->driver_data;
-	return off ? 0 : sprintf(page, "%x\n",kdev.value);
-}
-static DEVICE_ATTR(kdev,S_IRUGO,sr_device_kdev_read,NULL);
-
-static ssize_t sr_device_type_read(struct device *driverfs_dev, 
-				   char *page, size_t count, loff_t off) 
-{
-	return off ? 0 : sprintf (page, "CHR\n");
-}
-static DEVICE_ATTR(type,S_IRUGO,sr_device_type_read,NULL);
-
-
 void sr_finish()
 {
 	int i;
@@ -797,22 +779,8 @@ void sr_finish()
 		 */
 		get_capabilities(cd);
 		sr_vendor_init(cd);
-
-		sprintf(cd->cdi.cdrom_driverfs_dev.bus_id, "%s:cd",
-			cd->device->sdev_driverfs_dev.bus_id);
-		sprintf(cd->cdi.cdrom_driverfs_dev.name, "%scdrom",
-			cd->device->sdev_driverfs_dev.name);
-		cd->cdi.cdrom_driverfs_dev.parent = 
-			&cd->device->sdev_driverfs_dev;
-		cd->cdi.cdrom_driverfs_dev.bus = &scsi_driverfs_bus_type;
-		cd->cdi.cdrom_driverfs_dev.driver_data = 
-			(void *)(long)__mkdev(MAJOR_NR, i);
-		device_register(&cd->cdi.cdrom_driverfs_dev);
-		device_create_file(&cd->cdi.cdrom_driverfs_dev,
-				   &dev_attr_type);
-		device_create_file(&cd->cdi.cdrom_driverfs_dev,
-				   &dev_attr_kdev);
 		disk->de = cd->device->de;
+		disk->driverfs_dev = &cd->device->sdev_driverfs_dev;
 		register_cdrom(&cd->cdi);
 		set_capacity(disk, cd->capacity);
 		add_disk(disk);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index a61a83ded312..5fc23d047567 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,6 +18,7 @@
 #include <linux/blk.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <../drivers/base/fs/fs.h>	/* Eeeeewwwww */
 
 #include "check.h"
 
@@ -111,57 +112,6 @@ char *disk_name(struct gendisk *hd, int part, char *buf)
 	return buf;
 }
 
-/* Driverfs file support */
-static ssize_t partition_device_kdev_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off)
-{
-	kdev_t kdev; 
-	kdev.value=(int)(long)driverfs_dev->driver_data;
-	return off ? 0 : sprintf (page, "%x\n",kdev.value);
-}
-static DEVICE_ATTR(kdev,S_IRUGO,partition_device_kdev_read,NULL);
-
-static ssize_t partition_device_type_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off) 
-{
-	return off ? 0 : sprintf (page, "BLK\n");
-}
-static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
-
-static void driverfs_create_partitions(struct gendisk *hd)
-{
-	struct device *parent = hd->driverfs_dev;
-	struct device *dev = &hd->disk_dev;
-
-	/* if driverfs not supported by subsystem, skip partitions */
-	if (!(hd->flags & GENHD_FL_DRIVERFS))
-		return;
-
-	if (parent)  {
-		sprintf(dev->name, "%sdisc", parent->name);
-		sprintf(dev->bus_id, "%sdisc", parent->bus_id);
-		dev->parent = parent;
-		dev->bus = parent->bus;
-	} else {
-		sprintf(dev->name, "disc");
-		sprintf(dev->bus_id, "disc");
-	}
-	dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor);
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
-}
-
-static void driverfs_remove_partitions(struct gendisk *hd)
-{
-	struct device *dev = &hd->disk_dev;
-	if (!(hd->flags & GENHD_FL_DRIVERFS))
-		return;
-	device_remove_file(dev, &dev_attr_type);
-	device_remove_file(dev, &dev_attr_kdev);
-	put_device(dev);	
-}
-
 static struct parsed_partitions *
 check_partition(struct gendisk *hd, struct block_device *bdev)
 {
@@ -326,6 +276,40 @@ static void devfs_remove_partitions(struct gendisk *dev)
 #endif
 }
 
+static ssize_t part_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->parent->driver_data;
+	struct hd_struct *p = dev->driver_data;
+	int part = p - disk->part + 1;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base + part);
+}
+static ssize_t part_start_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->start_sect);
+}
+static ssize_t part_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->nr_sects);
+}
+static struct device_attribute part_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= part_dev_read
+};
+static struct device_attribute part_attr_start = {
+	.attr = {.name = "start", .mode = S_IRUGO },
+	.show	= part_start_read
+};
+static struct device_attribute part_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= part_size_read
+};
+
 void delete_partition(struct gendisk *disk, int part)
 {
 	struct hd_struct *p = disk->part + part - 1;
@@ -338,8 +322,9 @@ void delete_partition(struct gendisk *disk, int part)
 	dev = p->hd_driverfs_dev;
 	p->hd_driverfs_dev = NULL;
 	if (dev) {
-		device_remove_file(dev, &dev_attr_type);
-		device_remove_file(dev, &dev_attr_kdev);
+		device_remove_file(dev, &part_attr_size);
+		device_remove_file(dev, &part_attr_start);
+		device_remove_file(dev, &part_attr_dev);
 		device_unregister(dev);	
 	}
 }
@@ -352,43 +337,130 @@ static void part_release(struct device *dev)
 void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 {
 	struct hd_struct *p = disk->part + part - 1;
-	struct device *parent = disk->disk_dev.parent;
+	struct device *parent = &disk->disk_dev;
 	struct device *dev;
 
 	p->start_sect = start;
 	p->nr_sects = len;
 	devfs_register_partition(disk, part);
-	if (!(disk->flags & GENHD_FL_DRIVERFS))
-		return;
 	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
 	if (!dev)
 		return;
 	memset(dev, 0, sizeof(struct device));
-	if (parent)  {
-		sprintf(dev->name, "%spart%d", parent->name, part);
-		sprintf(dev->bus_id, "%s:p%d", parent->bus_id, part);
-		dev->parent = parent;
-		dev->bus = parent->bus;
-	} else {
-		sprintf(dev->name, "part%d", part);
-		sprintf(dev->bus_id, "p%d", part);
-	}
+	dev->parent = parent;
+	sprintf(dev->bus_id, "p%d", part);
 	dev->release = part_release;
-	dev->driver_data =
-		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
+	dev->driver_data = p;
 	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
+	device_create_file(dev, &part_attr_dev);
+	device_create_file(dev, &part_attr_start);
+	device_create_file(dev, &part_attr_size);
 	p->hd_driverfs_dev = dev;
 }
 
+static ssize_t disk_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base);
+}
+static ssize_t disk_range_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%d\n",disk->minors);
+}
+static ssize_t disk_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)get_capacity(disk));
+}
+static struct device_attribute disk_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= disk_dev_read
+};
+static struct device_attribute disk_attr_range = {
+	.attr = {.name = "range", .mode = S_IRUGO },
+	.show	= disk_range_read
+};
+static struct device_attribute disk_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= disk_size_read
+};
+
+static void disk_driverfs_symlinks(struct gendisk *disk)
+{
+	struct device *target = disk->driverfs_dev;
+	struct device *dev = &disk->disk_dev;
+	struct device *p;
+	char *path;
+	char *s;
+	int length;
+	int depth;
+
+	if (!target)
+		return;
+
+	get_device(target);
+
+	length = get_devpath_length(target);
+	length += strlen("..");
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+
+	/* our relative position */
+	strcpy(path,"..");
+
+	fill_devpath(target, path, length);
+	driverfs_create_symlink(&dev->dir, "device", path);
+	kfree(path);
+
+	for (p = target, depth = 0; p; p = p->parent, depth++)
+		;
+	length = get_devpath_length(dev);
+	length += 3 * depth - 1;
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+	for (s = path; depth--; s += 3)
+		strcpy(s, "../");
+
+	fill_devpath(dev, path, length);
+	driverfs_create_symlink(&target->dir, "block", path);
+	kfree(path);
+}
+
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct device *dev = &disk->disk_dev;
 	struct parsed_partitions *state;
 	struct block_device *bdev;
+	char *s;
 	int j;
 
+	strcpy(dev->bus_id, disk->disk_name);
+	/* ewww... some of these buggers have / in name... */
+	s = strchr(dev->bus_id, '/');
+	if (s)
+		*s = '!';
+	device_add(dev);
+	device_create_file(dev, &disk_attr_dev);
+	device_create_file(dev, &disk_attr_range);
+	device_create_file(dev, &disk_attr_size);
+	disk_driverfs_symlinks(disk);
+
 	if (disk->flags & GENHD_FL_CD)
 		devfs_create_cdrom(disk);
 
@@ -404,7 +476,6 @@ void register_disk(struct gendisk *disk)
 	if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 		return;
 	state = check_partition(disk, bdev);
-	driverfs_create_partitions(disk);
 	devfs_create_partitions(disk);
 	if (state) {
 		for (j = 1; j < state->limit; j++) {
@@ -499,8 +570,16 @@ void del_gendisk(struct gendisk *disk)
 	disk->capacity = 0;
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
-	driverfs_remove_partitions(disk);
 	devfs_remove_partitions(disk);
+	device_remove_file(&disk->disk_dev, &disk_attr_dev);
+	device_remove_file(&disk->disk_dev, &disk_attr_range);
+	device_remove_file(&disk->disk_dev, &disk_attr_size);
+	driverfs_remove_file(&disk->disk_dev.dir, "device");
+	if (disk->driverfs_dev) {
+		driverfs_remove_file(&disk->driverfs_dev->dir, "block");
+		put_device(disk->driverfs_dev);
+	}
+	device_del(&disk->disk_dev);
 }
 
 struct dev_name {
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index b287b7a24b11..4387203c95b7 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -730,7 +730,6 @@ struct cdrom_device_info {
 	struct cdrom_device_ops  *ops;  /* link to device_ops */
 	struct cdrom_device_info *next; /* next device_info for this major */
 	void *handle;		        /* driver-dependent data */
-	struct device cdrom_driverfs_dev; /* driverfs implementation */
 /* specifications */
         kdev_t dev;	                /* device number */
 	int mask;                       /* mask of capability: disables them */
-- 
cgit v1.2.3


From 68c16870dcfaba7c9e2dd5055a2caf4edcf42e87 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:32 -0700
Subject: [PATCH] refcounts for gendisks

Finally.  We use disk->dev.refcount as a gendisk refcount.  New helper -
get_disk(): atomic_inc on refcount.  get_gendisk() does it on return,
callers of get_gendisk() do put_disk() when they are done.
---
 drivers/block/genhd.c | 10 ++++++++++
 drivers/block/ioctl.c | 47 +++++++++++++++++++++++++++++++++++++----------
 fs/block_dev.c        | 23 ++++++++++++++++++-----
 fs/partitions/check.c |  1 +
 include/linux/genhd.h |  7 +++++--
 5 files changed, 71 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index ecb2dcdf214d..1cc4655c04c9 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -100,6 +100,8 @@ get_gendisk(dev_t dev, int *part)
 	read_lock(&gendisk_lock);
 	if (gendisks[major].get) {
 		disk = gendisks[major].get(minor);
+		if (disk)
+			get_disk(disk);
 		read_unlock(&gendisk_lock);
 		return disk;
 	}
@@ -109,6 +111,7 @@ get_gendisk(dev_t dev, int *part)
 			continue;
 		if (disk->first_minor + disk->minors <= minor)
 			continue;
+		get_disk(disk);
 		read_unlock(&gendisk_lock);
 		*part = minor - disk->first_minor;
 		return disk;
@@ -244,6 +247,12 @@ struct gendisk *alloc_disk(int minors)
 	return disk;
 }
 
+struct gendisk *get_disk(struct gendisk *disk)
+{
+	atomic_inc(&disk->disk_dev.refcount);
+	return disk;
+}
+
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
@@ -251,4 +260,5 @@ void put_disk(struct gendisk *disk)
 }
 
 EXPORT_SYMBOL(alloc_disk);
+EXPORT_SYMBOL(get_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index 4af05bc32db2..de2da2b44cad 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -25,13 +25,17 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 	disk = get_gendisk(bdev->bd_dev, &part);
 	if (!disk)
 		return -ENXIO;
-	if (bdev != bdev->bd_contains)
+	if (bdev != bdev->bd_contains) {
+		put_disk(disk);
 		return -EINVAL;
+	}
 	if (part)
 		BUG();
 	part = p.pno;
-	if (part <= 0 || part >= disk->minors)
+	if (part <= 0 || part >= disk->minors) {
+		put_disk(disk);
 		return -EINVAL;
+	}
 
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
@@ -42,34 +46,46 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			    sizeof(long long) > sizeof(long)) {
 				long pstart = start, plength = length;
 				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0)
+				    || pstart < 0 || plength < 0) {
+					put_disk(disk);
 					return -EINVAL;
+				}
 			}
 
 			/* partition number in use? */
-			if (disk->part[part - 1].nr_sects != 0)
+			if (disk->part[part - 1].nr_sects != 0) {
+				put_disk(disk);
 				return -EBUSY;
+			}
 
 			/* overlap? */
 			for (i = 0; i < disk->minors - 1; i++) {
 				struct hd_struct *s = &disk->part[i];
 				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects))
+				      start >= s->start_sect + s->nr_sects)) {
+					put_disk(disk);
 					return -EBUSY;
+				}
 			}
 			/* all seems OK */
 			add_partition(disk, part, start, length);
+			put_disk(disk);
 			return 0;
 		case BLKPG_DEL_PARTITION:
-			if (disk->part[part - 1].nr_sects == 0)
+			if (disk->part[part - 1].nr_sects == 0) {
+				put_disk(disk);
 				return -ENXIO;
+			}
 
 			/* partition in use? Incomplete check for now. */
 			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
-			if (!bdevp)
+			if (!bdevp) {
+				put_disk(disk);
 				return -ENOMEM;
+			}
 			if (bd_claim(bdevp, &holder) < 0) {
 				bdput(bdevp);
+				put_disk(disk);
 				return -EBUSY;
 			}
 
@@ -80,8 +96,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
+			put_disk(disk);
 			return 0;
 		default:
+			put_disk(disk);
 			return -EINVAL;
 	}
 }
@@ -92,16 +110,25 @@ static int blkdev_reread_part(struct block_device *bdev)
 	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
 	int res = 0;
 
-	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
+	if (!disk)
 		return -EINVAL;
+	if (disk->minors == 1 || bdev != bdev->bd_contains) {
+		put_disk(disk);
+		return -EINVAL;
+	}
 	if (part)
 		BUG();
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN)) {
+		put_disk(disk);
 		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
+	}
+	if (down_trylock(&bdev->bd_sem)) {
+		put_disk(disk);
 		return -EBUSY;
+	}
 	res = rescan_partitions(disk, bdev);
 	up(&bdev->bd_sem);
+	put_disk(disk);
 	return res;
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dff0244e63a6..d029636b07e6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -542,6 +542,7 @@ int check_disk_change(struct block_device *bdev)
 		bdops->revalidate(dev);
 	if (disk && disk->minors > 1)
 		bdev->bd_invalidated = 1;
+	put_disk(disk);
 	return 1;
 }
 
@@ -553,7 +554,9 @@ int full_check_disk_change(struct block_device *bdev)
 		BUG();
 	down(&bdev->bd_sem);
 	if (check_disk_change(bdev)) {
-		rescan_partitions(get_gendisk(bdev->bd_dev, &n), bdev);
+		struct gendisk *disk = get_gendisk(bdev->bd_dev, &n);
+		rescan_partitions(disk, bdev);
+		put_disk(disk);
 		res = 1;
 	}
 	up(&bdev->bd_sem);
@@ -622,13 +625,18 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			struct block_device *disk;
 			disk = bdget(MKDEV(g->major, g->first_minor));
 			ret = -ENOMEM;
-			if (!disk)
+			if (!disk) {
+				put_disk(g);
 				goto out1;
+			}
 			ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
-			if (ret)
+			if (ret) {
+				put_disk(g);
 				goto out1;
+			}
 			bdev->bd_contains = disk;
 		}
+		put_disk(g);
 	}
 	if (bdev->bd_contains == bdev) {
 		int part;
@@ -643,8 +651,10 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 
 		if (bdev->bd_op->open) {
 			ret = bdev->bd_op->open(inode, file);
-			if (ret)
+			if (ret) {
+				put_disk(g);
 				goto out2;
+			}
 		}
 		if (!bdev->bd_openers) {
 			struct backing_dev_info *bdi;
@@ -662,6 +672,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		}
 		if (bdev->bd_invalidated)
 			rescan_partitions(g, bdev);
+		put_disk(g);
 	} else {
 		down(&bdev->bd_contains->bd_sem);
 		bdev->bd_contains->bd_part_count++;
@@ -673,15 +684,17 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			inode->i_data.backing_dev_info =
 			   bdev->bd_inode->i_data.backing_dev_info =
 			   bdev->bd_contains->bd_inode->i_data.backing_dev_info;
-			if (!p->nr_sects) {
+			if (!(g->flags & GENHD_FL_UP) || !p->nr_sects) {
 				bdev->bd_contains->bd_part_count--;
 				up(&bdev->bd_contains->bd_sem);
+				put_disk(g);
 				ret = -ENXIO;
 				goto out2;
 			}
 			bdev->bd_queue = bdev->bd_contains->bd_queue;
 			bdev->bd_offset = p->start_sect;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+			put_disk(g);
 		}
 		up(&bdev->bd_contains->bd_sem);
 	}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5fc23d047567..e6ed1a443116 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -616,6 +616,7 @@ char *partition_name(dev_t dev)
 	dname->name = NULL;
 	if (hd)
 		dname->name = disk_name(hd, part, dname->namebuf);
+	put_disk(hd);
 	if (!dname->name) {
 		sprintf(dname->namebuf, "[dev %s]", kdevname(to_kdev_t(dev)));
 		dname->name = dname->namebuf;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6b859fad6a8a..030ee2f87891 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -266,6 +266,7 @@ extern void add_partition(struct gendisk *, int, sector_t, sector_t);
 extern void delete_partition(struct gendisk *, int);
 
 extern struct gendisk *alloc_disk(int minors);
+extern struct gendisk *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
 
 /* will go away */
@@ -273,9 +274,11 @@ extern void blk_set_probe(int major, struct gendisk *(p)(int));
 
 static inline unsigned int disk_index (kdev_t dev)
 {
-	int part;
+	int part, res;
 	struct gendisk *g = get_gendisk(kdev_t_to_nr(dev), &part);
-	return g ? (minor(dev) >> g->minor_shift) : 0;
+	res = g ? (minor(dev) >> g->minor_shift) : 0;
+	put_disk(g);
+	return res;
 }
 
 #endif
-- 
cgit v1.2.3


From 5682bcc620dbee99319997718c8929ec0d797854 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:37 -0700
Subject: [PATCH] bdev->bd_disk introduced

There we go - now we can put a reference to gendisk into block_device.  Which
we do in do_open().  Most of the callers of get_gendisk() are simply using
bdev->bd_disk now (and most of the put_disk() calls introduced on previous
step disappear).  We also put that pointer into struct request - ->rq_disk.
That allows to get rid of disk_index() kludges in md.c (we simply count
relevant IO in the struct gendisk fields) and kill the export of get_gendisk().
	Notice that by now we can move _all_ IO counters into gendisk.  That
will kill a bunch of per-major arrays and more importantly, allow to merge
sard in clean way.  FWIW, we probably could show them as disk/partitions
attributes in driverfs...
---
 drivers/block/genhd.c     | 10 ++-----
 drivers/block/ioctl.c     | 65 +++++++++---------------------------------
 drivers/block/ll_rw_blk.c | 15 +++++++++-
 drivers/block/rd.c        |  1 +
 drivers/md/md.c           | 23 ++-------------
 fs/block_dev.c            | 72 ++++++++++++++++++-----------------------------
 include/linux/blkdev.h    |  1 +
 include/linux/fs.h        |  1 +
 include/linux/genhd.h     | 13 +++------
 9 files changed, 68 insertions(+), 133 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 1cc4655c04c9..449e69061bbc 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -61,10 +61,7 @@ void add_disk(struct gendisk *disk)
 {
 	write_lock(&gendisk_lock);
 	list_add(&disk->list, &gendisks[disk->major].list);
-	if (disk->minors > 1)
-		list_add_tail(&disk->full_list, &gendisk_list);
-	else
-		INIT_LIST_HEAD(&disk->full_list);
+	list_add_tail(&disk->full_list, &gendisk_list);
 	write_unlock(&gendisk_lock);
 	disk->flags |= GENHD_FL_UP;
 	register_disk(disk);
@@ -120,8 +117,6 @@ get_gendisk(dev_t dev, int *part)
 	return NULL;
 }
 
-EXPORT_SYMBOL(get_gendisk);
-
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *part_start(struct seq_file *part, loff_t *pos)
@@ -158,7 +153,7 @@ static int show_partition(struct seq_file *part, void *v)
 		seq_puts(part, "major minor  #blocks  name\n\n");
 
 	/* Don't show non-partitionable devices or empty devices */
-	if (!get_capacity(sgp))
+	if (!get_capacity(sgp) || sgp->minors == 1)
 		return 0;
 
 	/* show the full disk and all non-0 size partitions of it */
@@ -239,6 +234,7 @@ struct gendisk *alloc_disk(int minors)
 		disk->minors = minors;
 		while (minors >>= 1)
 			disk->minor_shift++;
+		INIT_LIST_HEAD(&disk->full_list);
 		disk->disk_dev.bus = &disk_bus;
 		disk->disk_dev.release = disk_release;
 		disk->disk_dev.driver_data = disk;
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index de2da2b44cad..e420c691763d 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -22,21 +22,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 		return -EFAULT;
 	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
 		return -EFAULT;
-	disk = get_gendisk(bdev->bd_dev, &part);
-	if (!disk)
-		return -ENXIO;
-	if (bdev != bdev->bd_contains) {
-		put_disk(disk);
+	disk = bdev->bd_disk;
+	if (bdev != bdev->bd_contains)
 		return -EINVAL;
-	}
-	if (part)
-		BUG();
 	part = p.pno;
-	if (part <= 0 || part >= disk->minors) {
-		put_disk(disk);
+	if (part <= 0 || part >= disk->minors)
 		return -EINVAL;
-	}
-
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
 			start = p.start >> 9;
@@ -46,49 +37,33 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			    sizeof(long long) > sizeof(long)) {
 				long pstart = start, plength = length;
 				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0) {
-					put_disk(disk);
+				    || pstart < 0 || plength < 0)
 					return -EINVAL;
-				}
 			}
-
 			/* partition number in use? */
-			if (disk->part[part - 1].nr_sects != 0) {
-				put_disk(disk);
+			if (disk->part[part - 1].nr_sects != 0)
 				return -EBUSY;
-			}
-
 			/* overlap? */
 			for (i = 0; i < disk->minors - 1; i++) {
 				struct hd_struct *s = &disk->part[i];
 				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects)) {
-					put_disk(disk);
+				      start >= s->start_sect + s->nr_sects))
 					return -EBUSY;
-				}
 			}
 			/* all seems OK */
 			add_partition(disk, part, start, length);
-			put_disk(disk);
 			return 0;
 		case BLKPG_DEL_PARTITION:
-			if (disk->part[part - 1].nr_sects == 0) {
-				put_disk(disk);
+			if (disk->part[part - 1].nr_sects == 0)
 				return -ENXIO;
-			}
-
 			/* partition in use? Incomplete check for now. */
 			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
-			if (!bdevp) {
-				put_disk(disk);
+			if (!bdevp)
 				return -ENOMEM;
-			}
 			if (bd_claim(bdevp, &holder) < 0) {
 				bdput(bdevp);
-				put_disk(disk);
 				return -EBUSY;
 			}
-
 			/* all seems OK */
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
@@ -96,39 +71,25 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
-			put_disk(disk);
 			return 0;
 		default:
-			put_disk(disk);
 			return -EINVAL;
 	}
 }
 
 static int blkdev_reread_part(struct block_device *bdev)
 {
-	int part;
-	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
-	int res = 0;
+	struct gendisk *disk = bdev->bd_disk;
+	int res;
 
-	if (!disk)
-		return -EINVAL;
-	if (disk->minors == 1 || bdev != bdev->bd_contains) {
-		put_disk(disk);
+	if (disk->minors == 1 || bdev != bdev->bd_contains)
 		return -EINVAL;
-	}
-	if (part)
-		BUG();
-	if (!capable(CAP_SYS_ADMIN)) {
-		put_disk(disk);
+	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	}
-	if (down_trylock(&bdev->bd_sem)) {
-		put_disk(disk);
+	if (down_trylock(&bdev->bd_sem))
 		return -EBUSY;
-	}
 	res = rescan_partitions(disk, bdev);
 	up(&bdev->bd_sem);
-	put_disk(disk);
 	return res;
 }
 
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index ea56c1d8456c..eb877e50a8d1 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1427,7 +1427,19 @@ void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 	int rw = rq_data_dir(rq);
 	unsigned int index;
 
-	index = disk_index(rq->rq_dev);
+	if (!rq->rq_disk)
+		return;
+
+	if (rw == READ) {
+		rq->rq_disk->rio += new_io;
+		rq->rq_disk->reads += nr_sectors;
+	} else if (rw == WRITE) {
+		rq->rq_disk->wio += new_io;
+		rq->rq_disk->writes += nr_sectors;
+	}
+
+	index = rq->rq_disk->first_minor >> rq->rq_disk->minor_shift;
+
 	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
 		return;
 
@@ -1747,6 +1759,7 @@ get_rq:
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->rq_dev = to_kdev_t(bio->bi_bdev->bd_dev);
+	req->rq_disk = bio->bi_bdev->bd_disk;
 	add_request(q, req, insert_here);
 out:
 	if (freereq)
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 7d72b786080c..bbd247fa29dc 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -381,6 +381,7 @@ static int rd_open(struct inode * inode, struct file * filp)
 		rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops;
 		rd_bdev[unit]->bd_inode->i_size = rd_length[unit];
 		rd_bdev[unit]->bd_queue = &blk_dev[MAJOR_NR].request_queue;
+		rd_bdev[unit]->bd_disk = get_disk(rd_disks[unit]);
 	}
 
 	return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 205bb0fdeee0..784e3b69213e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2731,18 +2731,9 @@ int unregister_md_personality(int pnum)
 	return 0;
 }
 
-static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
 void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors)
 {
-	kdev_t dev = to_kdev_t(rdev->bdev->bd_dev);
-	unsigned int major = major(dev);
-	unsigned int index;
-
-	index = disk_index(dev);
-	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
-		return;
-
-	sync_io[major][index] += nr_sectors;
+	rdev->bdev->bd_disk->sync_io += nr_sectors;
 }
 
 static int is_mddev_idle(mddev_t *mddev)
@@ -2754,16 +2745,8 @@ static int is_mddev_idle(mddev_t *mddev)
 
 	idle = 1;
 	ITERATE_RDEV(mddev,rdev,tmp) {
-		kdev_t dev = to_kdev_t(rdev->bdev->bd_dev);
-		int major = major(dev);
-		int idx = disk_index(dev);
-
-		if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
-			continue;
-
-		curr_events = kstat.dk_drive_rblk[major][idx] +
-						kstat.dk_drive_wblk[major][idx] ;
-		curr_events -= sync_io[major][idx];
+		struct gendisk *disk = rdev->bdev->bd_disk;
+		curr_events = disk->reads + disk->writes - disk->sync_io;
 		if ((curr_events - rdev->last_events) > 32) {
 			rdev->last_events = curr_events;
 			idle = 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d029636b07e6..1ad7f467993b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -526,8 +526,6 @@ int check_disk_change(struct block_device *bdev)
 {
 	struct block_device_operations * bdops = bdev->bd_op;
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
-	struct gendisk *disk;
-	int part;
 
 	if (bdops->check_media_change == NULL)
 		return 0;
@@ -537,26 +535,21 @@ int check_disk_change(struct block_device *bdev)
 	if (invalidate_device(dev, 0))
 		printk("VFS: busy inodes on changed media.\n");
 
-	disk = get_gendisk(bdev->bd_dev, &part);
 	if (bdops->revalidate)
 		bdops->revalidate(dev);
-	if (disk && disk->minors > 1)
+	if (bdev->bd_disk->minors > 1)
 		bdev->bd_invalidated = 1;
-	put_disk(disk);
 	return 1;
 }
 
 int full_check_disk_change(struct block_device *bdev)
 {
 	int res = 0;
-	int n;
 	if (bdev->bd_contains != bdev)
 		BUG();
 	down(&bdev->bd_sem);
 	if (check_disk_change(bdev)) {
-		struct gendisk *disk = get_gendisk(bdev->bd_dev, &n);
-		rescan_partitions(disk, bdev);
-		put_disk(disk);
+		rescan_partitions(bdev->bd_disk, bdev);
 		res = 1;
 	}
 	up(&bdev->bd_sem);
@@ -598,6 +591,8 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	struct module *owner = NULL;
 	struct block_device_operations *ops, *old;
+	struct gendisk *disk;
+	int part;
 
 	lock_kernel();
 	ops = get_blkfops(major(dev));
@@ -617,53 +612,41 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		if (owner)
 			__MOD_DEC_USE_COUNT(owner);
 	}
+	disk = get_gendisk(bdev->bd_dev, &part);
+	if (!disk)
+		goto out1;
 	if (!bdev->bd_contains) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 		bdev->bd_contains = bdev;
-		if (g && part) {
-			struct block_device *disk;
-			disk = bdget(MKDEV(g->major, g->first_minor));
+		if (part) {
+			struct block_device *whole;
+			whole = bdget(MKDEV(disk->major, disk->first_minor));
 			ret = -ENOMEM;
-			if (!disk) {
-				put_disk(g);
+			if (!whole)
 				goto out1;
-			}
-			ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
-			if (ret) {
-				put_disk(g);
+			ret = blkdev_get(whole, file->f_mode, file->f_flags, BDEV_RAW);
+			if (ret)
 				goto out1;
-			}
-			bdev->bd_contains = disk;
+			bdev->bd_contains = whole;
 		}
-		put_disk(g);
 	}
 	if (bdev->bd_contains == bdev) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
-
+		if (!bdev->bd_openers)
+			bdev->bd_disk = disk;
 		if (!bdev->bd_queue) {
 			struct blk_dev_struct *p = blk_dev + major(dev);
 			bdev->bd_queue = &p->request_queue;
 			if (p->queue)
 				bdev->bd_queue =  p->queue(dev);
 		}
-
 		if (bdev->bd_op->open) {
 			ret = bdev->bd_op->open(inode, file);
-			if (ret) {
-				put_disk(g);
+			if (ret)
 				goto out2;
-			}
 		}
 		if (!bdev->bd_openers) {
 			struct backing_dev_info *bdi;
-			sector_t sect = 0;
-
 			bdev->bd_offset = 0;
-			if (g)
-				sect = get_capacity(g);
-			bd_set_size(bdev, (loff_t)sect << 9);
+			bd_set_size(bdev, (loff_t)get_capacity(disk) << 9);
 			bdi = blk_get_backing_dev_info(bdev);
 			if (bdi == NULL)
 				bdi = &default_backing_dev_info;
@@ -671,34 +654,31 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			bdev->bd_inode->i_data.backing_dev_info = bdi;
 		}
 		if (bdev->bd_invalidated)
-			rescan_partitions(g, bdev);
-		put_disk(g);
+			rescan_partitions(disk, bdev);
 	} else {
 		down(&bdev->bd_contains->bd_sem);
 		bdev->bd_contains->bd_part_count++;
 		if (!bdev->bd_openers) {
-			int part;
-			struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 			struct hd_struct *p;
-			p = g->part + part - 1;
+			p = disk->part + part - 1;
 			inode->i_data.backing_dev_info =
 			   bdev->bd_inode->i_data.backing_dev_info =
 			   bdev->bd_contains->bd_inode->i_data.backing_dev_info;
-			if (!(g->flags & GENHD_FL_UP) || !p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) || !p->nr_sects) {
 				bdev->bd_contains->bd_part_count--;
 				up(&bdev->bd_contains->bd_sem);
-				put_disk(g);
 				ret = -ENXIO;
 				goto out2;
 			}
 			bdev->bd_queue = bdev->bd_contains->bd_queue;
 			bdev->bd_offset = p->start_sect;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
-			put_disk(g);
+			bdev->bd_disk = disk;
 		}
 		up(&bdev->bd_contains->bd_sem);
 	}
-	bdev->bd_openers++;
+	if (bdev->bd_openers++)
+		put_disk(disk);
 	up(&bdev->bd_sem);
 	unlock_kernel();
 	return 0;
@@ -712,6 +692,7 @@ out2:
 		}
 	}
 out1:
+	put_disk(disk);
 	if (!old) {
 		bdev->bd_op = NULL;
 		if (owner)
@@ -785,15 +766,18 @@ int blkdev_put(struct block_device *bdev, int kind)
 		up(&bdev->bd_contains->bd_sem);
 	}
 	if (!bdev->bd_openers) {
+		struct gendisk *disk = bdev->bd_disk;
 		if (bdev->bd_op->owner)
 			__MOD_DEC_USE_COUNT(bdev->bd_op->owner);
 		bdev->bd_op = NULL;
 		bdev->bd_queue = NULL;
+		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 		if (bdev != bdev->bd_contains) {
 			blkdev_put(bdev->bd_contains, BDEV_RAW);
 			bdev->bd_contains = NULL;
 		}
+		put_disk(disk);
 	}
 	unlock_kernel();
 	up(&bdev->bd_sem);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 607641c6cfb1..ccb56d58de6a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -34,6 +34,7 @@ struct request {
 
 	int rq_status;	/* should split this into a few status bits */
 	kdev_t rq_dev;
+	struct gendisk *rq_disk;
 	int errors;
 	sector_t sector;
 	unsigned long nr_sectors;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cac13f931cec..bca164f4265a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -359,6 +359,7 @@ struct block_device {
 	sector_t		bd_offset;
 	unsigned		bd_part_count;
 	int			bd_invalidated;
+	struct gendisk *	bd_disk;
 };
 
 struct inode {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 030ee2f87891..9de2f51ae935 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -90,6 +90,10 @@ struct gendisk {
 	devfs_handle_t disk_de;		/* piled higher and deeper */
 	struct device *driverfs_dev;
 	struct device disk_dev;
+
+	unsigned sync_io;		/* RAID */
+	unsigned reads, writes;
+	unsigned rio, wio;
 };
 
 /* drivers/block/genhd.c */
@@ -272,15 +276,6 @@ extern void put_disk(struct gendisk *disk);
 /* will go away */
 extern void blk_set_probe(int major, struct gendisk *(p)(int));
 
-static inline unsigned int disk_index (kdev_t dev)
-{
-	int part, res;
-	struct gendisk *g = get_gendisk(kdev_t_to_nr(dev), &part);
-	res = g ? (minor(dev) >> g->minor_shift) : 0;
-	put_disk(g);
-	return res;
-}
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From 7e1aee05c99cfbb7e5cf33bae11ab9fa8df6c57c Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:32 -0700
Subject: [PATCH] oprofile - dcookies

This implements the persistent path-to-dcookies mapping, and adds a
system call for the user-space profiler to look up the profile data, so
it can tag profiles to specific binaries.
---
 arch/i386/kernel/entry.S  |   1 +
 fs/Makefile               |   4 +-
 fs/dcache.c               |   1 +
 fs/dcookies.c             | 323 ++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h |   2 +
 include/linux/dcache.h    |   3 +
 include/linux/dcookies.h  |  69 ++++++++++
 kernel/sys.c              |   2 +
 8 files changed, 404 insertions(+), 1 deletion(-)
 create mode 100644 fs/dcookies.c
 create mode 100644 include/linux/dcookies.h

(limited to 'fs')

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 557b684431c5..e873703e0c34 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -736,6 +736,7 @@ ENTRY(sys_call_table)
 	.long sys_alloc_hugepages /* 250 */
 	.long sys_free_hugepages
 	.long sys_exit_group
+	.long sys_lookup_dcookie
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/fs/Makefile b/fs/Makefile
index d902bdd8bda3..a4320cf860ac 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,7 +6,7 @@
 # 
 
 export-objs :=	open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \
-                fcntl.o read_write.o
+                fcntl.o read_write.o dcookies.o
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
 		bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
@@ -40,6 +40,8 @@ obj-y				+= partitions/
 obj-y				+= driverfs/
 obj-y				+= devpts/
 
+obj-$(CONFIG_PROFILING)		+= dcookies.o
+ 
 # Do not add any filesystems before this line
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
 obj-$(CONFIG_JBD)		+= jbd/
diff --git a/fs/dcache.c b/fs/dcache.c
index ef0871dbcdb2..d0fcfeba16ee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -637,6 +637,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
+	dentry->d_cookie = NULL;
 	INIT_LIST_HEAD(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
new file mode 100644
index 000000000000..0236c146b451
--- /dev/null
+++ b/fs/dcookies.c
@@ -0,0 +1,323 @@
+/*
+ * dcookies.c
+ *
+ * Copyright 2002 John Levon <levon@movementarian.org>
+ *
+ * Persistent cookie-path mappings. These are used by
+ * profilers to convert a per-task EIP value into something
+ * non-transitory that can be processed at a later date.
+ * This is done by locking the dentry/vfsmnt pair in the
+ * kernel until released by the tasks needing the persistent
+ * objects. The tag is simply an unsigned long that refers
+ * to the pair and can be looked up from userspace.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/dcookies.h>
+#include <asm/uaccess.h>
+
+/* The dcookies are allocated from a kmem_cache and
+ * hashed onto a small number of lists. None of the
+ * code here is particularly performance critical
+ */
+struct dcookie_struct {
+	struct dentry * dentry;
+	struct vfsmount * vfsmnt;
+	struct list_head hash_list;
+};
+
+static LIST_HEAD(dcookie_users);
+static DECLARE_MUTEX(dcookie_sem);
+static kmem_cache_t * dcookie_cache;
+static struct list_head * dcookie_hashtable;
+static size_t hash_size;
+
+static inline int is_live(void)
+{
+	return !(list_empty(&dcookie_users));
+}
+
+
+/* The dentry is locked, its address will do for the cookie */
+static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
+{
+	return (unsigned long)dcs->dentry;
+}
+
+
+static size_t dcookie_hash(unsigned long dcookie)
+{
+	return (dcookie >> 2) & (hash_size - 1);
+}
+
+
+static struct dcookie_struct * find_dcookie(unsigned long dcookie)
+{
+	struct dcookie_struct * found = 0;
+	struct dcookie_struct * dcs;
+	struct list_head * pos;
+	struct list_head * list;
+
+	list = dcookie_hashtable + dcookie_hash(dcookie);
+
+	list_for_each(pos, list) {
+		dcs = list_entry(pos, struct dcookie_struct, hash_list);
+		if (dcookie_value(dcs) == dcookie) {
+			found = dcs;
+			break;
+		}
+	}
+
+	return found;
+}
+
+
+static void hash_dcookie(struct dcookie_struct * dcs)
+{
+	struct list_head * list = dcookie_hashtable + dcookie_hash(dcookie_value(dcs));
+	list_add(&dcs->hash_list, list);
+}
+
+
+static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt)
+{
+	struct dcookie_struct * dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL);
+	if (!dcs)
+		return NULL;
+
+	atomic_inc(&dentry->d_count);
+	atomic_inc(&vfsmnt->mnt_count);
+	dentry->d_cookie = dcs;
+
+	dcs->dentry = dentry;
+	dcs->vfsmnt = vfsmnt;
+	hash_dcookie(dcs);
+
+	return dcs;
+}
+
+
+/* This is the main kernel-side routine that retrieves the cookie
+ * value for a dentry/vfsmnt pair.
+ */
+int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
+	unsigned long * cookie)
+{
+	int err = 0;
+	struct dcookie_struct * dcs;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	dcs = dentry->d_cookie;
+
+	if (!dcs)
+		dcs = alloc_dcookie(dentry, vfsmnt);
+
+	if (!dcs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*cookie = dcookie_value(dcs);
+
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+/* And here is where the userspace process can look up the cookie value
+ * to retrieve the path.
+ */
+asmlinkage int sys_lookup_dcookie(unsigned long cookie, char * buf, size_t len)
+{
+	char * kbuf;
+	char * path;
+	int err = -EINVAL;
+	size_t pathlen;
+	struct dcookie_struct * dcs;
+
+	/* we could leak path information to users
+	 * without dir read permission without this
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(dcs = find_dcookie(cookie)))
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!kbuf)
+		goto out;
+	memset(kbuf, 0, PAGE_SIZE);
+
+	/* FIXME: (deleted) ? */
+	path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE);
+
+	err = 0;
+
+	pathlen = kbuf + PAGE_SIZE - path;
+	if (len > pathlen)
+		len = pathlen;
+
+	if (copy_to_user(buf, path, len))
+		err = -EFAULT;
+
+	kfree(kbuf);
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+static int dcookie_init(void)
+{
+	struct list_head * d;
+	unsigned int i, hash_bits;
+	int err = -ENOMEM;
+
+	dcookie_cache = kmem_cache_create("dcookie_cache",
+		sizeof(struct dcookie_struct),
+		0, 0, NULL, NULL);
+
+	if (!dcookie_cache)
+		goto out;
+
+	dcookie_hashtable = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dcookie_hashtable)
+		goto out_kmem;
+
+	err = 0;
+
+	/*
+	 * Find the power-of-two list-heads that can fit into the allocation..
+	 * We don't guarantee that "sizeof(struct list_head)" is necessarily
+	 * a power-of-two.
+	 */
+	hash_size = PAGE_SIZE / sizeof(struct list_head);
+	hash_bits = 0;
+	do {
+		hash_bits++;
+	} while ((hash_size >> hash_bits) != 0);
+	hash_bits--;
+
+	/*
+	 * Re-calculate the actual number of entries and the mask
+	 * from the number of bits we can fit.
+	 */
+	hash_size = 1UL << hash_bits;
+
+	/* And initialize the newly allocated array */
+	d = dcookie_hashtable;
+	i = hash_size;
+	do {
+		INIT_LIST_HEAD(d);
+		d++;
+		i--;
+	} while (i);
+
+out:
+	return err;
+out_kmem:
+	kmem_cache_destroy(dcookie_cache);
+	goto out;
+}
+
+
+static void free_dcookie(struct dcookie_struct * dcs)
+{
+	dcs->dentry->d_cookie = NULL;
+	dput(dcs->dentry);
+	mntput(dcs->vfsmnt);
+	kmem_cache_free(dcookie_cache, dcs);
+}
+
+
+static void dcookie_exit(void)
+{
+	struct list_head * list;
+	struct list_head * pos;
+	struct list_head * pos2;
+	struct dcookie_struct * dcs;
+	size_t i;
+
+	for (i = 0; i < hash_size; ++i) {
+		list = dcookie_hashtable + i;
+		list_for_each_safe(pos, pos2, list) {
+			dcs = list_entry(pos, struct dcookie_struct, hash_list);
+			list_del(&dcs->hash_list);
+			free_dcookie(dcs);
+		}
+	}
+
+	kfree(dcookie_hashtable);
+	kmem_cache_destroy(dcookie_cache);
+}
+
+
+struct dcookie_user {
+	struct list_head next;
+};
+ 
+struct dcookie_user * dcookie_register(void)
+{
+	struct dcookie_user * user;
+
+	down(&dcookie_sem);
+
+	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
+	if (!user)
+		goto out;
+
+	if (!is_live() && dcookie_init())
+		goto out_free;
+
+	list_add(&user->next, &dcookie_users);
+
+out:
+	up(&dcookie_sem);
+	return user;
+out_free:
+	kfree(user);
+	user = NULL;
+	goto out;
+}
+
+
+void dcookie_unregister(struct dcookie_user * user)
+{
+	down(&dcookie_sem);
+
+	list_del(&user->next);
+	kfree(user);
+
+	if (!is_live())
+		dcookie_exit();
+
+	up(&dcookie_sem);
+}
+
+EXPORT_SYMBOL_GPL(dcookie_register);
+EXPORT_SYMBOL_GPL(dcookie_unregister);
+EXPORT_SYMBOL_GPL(get_dcookie);
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 8765a0f82aff..159dfa7fefe1 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -257,6 +257,8 @@
 #define __NR_alloc_hugepages	250
 #define __NR_free_hugepages	251
 #define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+  
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 71708edafce9..76a5085043e1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -66,6 +66,8 @@ static __inline__ unsigned int full_name_hash(const unsigned char * name, unsign
 
 #define DNAME_INLINE_LEN 16
 
+struct dcookie_struct;
+ 
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;
@@ -84,6 +86,7 @@ struct dentry {
 	unsigned long d_vfs_flags;
 	void * d_fsdata;		/* fs-specific data */
 	unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+	struct dcookie_struct * d_cookie; /* cookie, if any */
 };
 
 struct dentry_operations {
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
new file mode 100644
index 000000000000..b2ae9692dc05
--- /dev/null
+++ b/include/linux/dcookies.h
@@ -0,0 +1,69 @@
+/*
+ * dcookies.h
+ *
+ * Persistent cookie-path mappings
+ *
+ * Copyright 2002 John Levon <levon@movementarian.org>
+ */
+
+#ifndef DCOOKIES_H
+#define DCOOKIES_H
+ 
+#include <linux/config.h>
+
+#ifdef CONFIG_PROFILING
+ 
+#include <linux/types.h>
+ 
+struct dcookie_user;
+ 
+/**
+ * dcookie_register - register a user of dcookies
+ *
+ * Register as a dcookie user. Returns %NULL on failure.
+ */
+struct dcookie_user * dcookie_register(void);
+
+/**
+ * dcookie_unregister - unregister a user of dcookies
+ *
+ * Unregister as a dcookie user. This may invalidate
+ * any dcookie values returned from get_dcookie().
+ */
+void dcookie_unregister(struct dcookie_user * user);
+  
+/**
+ * get_dcookie - acquire a dcookie
+ *
+ * Convert the given dentry/vfsmount pair into
+ * a cookie value.
+ *
+ * Returns -EINVAL if no living task has registered as a
+ * dcookie user.
+ *
+ * Returns 0 on success, with *cookie filled in
+ */
+int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
+	unsigned long * cookie);
+
+#else
+
+struct dcookie_user * dcookie_register(void)
+{
+	return 0;
+}
+
+void dcookie_unregister(struct dcookie_user * user)
+{
+	return;
+}
+ 
+static inline int get_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt, unsigned long * cookie)
+{
+	return -ENOSYS;
+} 
+ 
+#endif /* CONFIG_PROFILING */
+ 
+#endif /* DCOOKIES_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 5b7e84384cfa..3c2992ac68f2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/times.h>
 #include <linux/security.h>
+#include <linux/dcookies.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -202,6 +203,7 @@ asmlinkage long sys_ni_syscall(void)
 cond_syscall(sys_nfsservctl)
 cond_syscall(sys_quotactl)
 cond_syscall(sys_acct)
+cond_syscall(sys_lookup_dcookie)
 
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
-- 
cgit v1.2.3


From 120790b8fe2d901d99f459a567fefbb35c2d15e1 Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:38 -0700
Subject: [PATCH] oprofile - timer hook

This implements a simple hook into the profiling timer for x86 so that
non-perfctr machines can still use oprofile.  This has proven useful for
laptops and the like.

It also reduces header dependencies a bit by centralising readprofile
code
---
 arch/i386/kernel/Makefile         |  1 +
 arch/i386/kernel/apic.c           | 12 ++--------
 arch/i386/kernel/i386_ksyms.c     |  3 +++
 arch/i386/kernel/profile.c        | 45 +++++++++++++++++++++++++++++++++++
 arch/i386/kernel/time.c           |  5 ----
 arch/i386/mach-generic/do_timer.h |  3 +--
 arch/i386/mach-visws/do_timer.h   |  3 +--
 fs/proc/proc_misc.c               |  1 +
 include/asm-i386/hw_irq.h         | 49 ++++++++++++++++++++++++++++++++-------
 include/linux/profile.h           | 11 +++++++++
 include/linux/sched.h             |  4 ----
 init/main.c                       | 20 ++--------------
 kernel/profile.c                  | 30 ++++++++++++++++++++++++
 kernel/timer.c                    |  4 ----
 14 files changed, 138 insertions(+), 53 deletions(-)
 create mode 100644 arch/i386/kernel/profile.c

(limited to 'fs')

diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index d201c60ac5c2..55f9312b7f39 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend.o
 obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
+obj-$(CONFIG_PROFILING)		+= profile.o
 
 EXTRA_AFLAGS   := -traditional
 
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index c2f56438f749..bff34a4d1dcf 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -1008,17 +1008,9 @@ int setup_profiling_timer(unsigned int multiplier)
 
 inline void smp_local_timer_interrupt(struct pt_regs * regs)
 {
-	int user = user_mode(regs);
 	int cpu = smp_processor_id();
 
-	/*
-	 * The profiling function is SMP safe. (nothing can mess
-	 * around with "current", and the profiling counters are
-	 * updated with atomic operations). This is especially
-	 * useful with a profiling multiplier != 1
-	 */
-	if (!user)
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 
 	if (--prof_counter[cpu] <= 0) {
 		/*
@@ -1036,7 +1028,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs)
 		}
 
 #ifdef CONFIG_SMP
-		update_process_times(user);
+		update_process_times(user_mode(regs));
 #endif
 	}
 
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 79c204a1f476..9314e0b9f880 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -167,6 +167,9 @@ EXPORT_SYMBOL(get_wchan);
 
 EXPORT_SYMBOL(rtc_lock);
 
+EXPORT_SYMBOL_GPL(register_profile_notifier);
+EXPORT_SYMBOL_GPL(unregister_profile_notifier);
+ 
 #undef memcpy
 #undef memset
 extern void * memset(void *,int,__kernel_size_t);
diff --git a/arch/i386/kernel/profile.c b/arch/i386/kernel/profile.c
new file mode 100644
index 000000000000..334af20585cb
--- /dev/null
+++ b/arch/i386/kernel/profile.c
@@ -0,0 +1,45 @@
+/*
+ *	linux/arch/i386/kernel/profile.c
+ *
+ *	(C) 2002 John Levon <levon@movementarian.org>
+ *
+ */
+
+#include <linux/profile.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/irq.h>
+#include <asm/hw_irq.h> 
+ 
+static struct notifier_block * profile_listeners;
+static rwlock_t profile_lock = RW_LOCK_UNLOCKED;
+ 
+int register_profile_notifier(struct notifier_block * nb)
+{
+	int err;
+	write_lock_irq(&profile_lock);
+	err = notifier_chain_register(&profile_listeners, nb);
+	write_unlock_irq(&profile_lock);
+	return err;
+}
+
+
+int unregister_profile_notifier(struct notifier_block * nb)
+{
+	int err;
+	write_lock_irq(&profile_lock);
+	err = notifier_chain_unregister(&profile_listeners, nb);
+	write_unlock_irq(&profile_lock);
+	return err;
+}
+
+
+void x86_profile_hook(struct pt_regs * regs)
+{
+	/* we would not even need this lock if
+	 * we had a global cli() on register/unregister
+	 */ 
+	read_lock(&profile_lock);
+	notifier_call_chain(&profile_listeners, 0, regs);
+	read_unlock(&profile_lock);
+}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 4e3b4f1cb4b3..cf53d2c1d50a 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -64,11 +64,6 @@ extern spinlock_t i8259A_lock;
 
 #include "do_timer.h"
 
-/*
- * for x86_do_profile()
- */
-#include <linux/irq.h>
-
 u64 jiffies_64;
 
 unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
diff --git a/arch/i386/mach-generic/do_timer.h b/arch/i386/mach-generic/do_timer.h
index 7ee964b2ebf2..4a24f8ad0635 100644
--- a/arch/i386/mach-generic/do_timer.h
+++ b/arch/i386/mach-generic/do_timer.h
@@ -20,8 +20,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs)
  * system, in that case we have to call the local interrupt handler.
  */
 #ifndef CONFIG_X86_LOCAL_APIC
-	if (!user_mode(regs))
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 #else
 	if (!using_apic_timer)
 		smp_local_timer_interrupt(regs);
diff --git a/arch/i386/mach-visws/do_timer.h b/arch/i386/mach-visws/do_timer.h
index b2c1cbed5cb9..d19c7063e17d 100644
--- a/arch/i386/mach-visws/do_timer.h
+++ b/arch/i386/mach-visws/do_timer.h
@@ -15,8 +15,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs)
  * system, in that case we have to call the local interrupt handler.
  */
 #ifndef CONFIG_X86_LOCAL_APIC
-	if (!user_mode(regs))
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 #else
 	if (!using_apic_timer)
 		smp_local_timer_interrupt(regs);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7bdea5bbe922..cbafa4129498 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -38,6 +38,7 @@
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/times.h>
+#include <linux/profile.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index f23f4f75ce65..1a60daa9172e 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -13,6 +13,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/profile.h>
 #include <asm/atomic.h>
 #include <asm/irq.h>
 
@@ -65,20 +66,31 @@ extern char _stext, _etext;
 
 #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
 
-extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
 /*
- * x86 profiling function, SMP safe. We might want to do this in
- * assembly totally?
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
  */
-static inline void x86_do_profile (unsigned long eip)
+static inline void x86_do_profile(struct pt_regs * regs)
 {
+	unsigned long eip;
+	extern unsigned long prof_cpu_mask;
+	extern char _stext;
+#ifdef CONFIG_PROFILING
+	extern void x86_profile_hook(struct pt_regs *);
+ 
+	x86_profile_hook(regs);
+#endif
+ 
+	if (user_mode(regs))
+		return;
+ 
 	if (!prof_buffer)
 		return;
 
+	eip = regs->eip;
+ 
 	/*
 	 * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
 	 * (default is all CPUs.)
@@ -97,7 +109,28 @@ static inline void x86_do_profile (unsigned long eip)
 		eip = prof_len-1;
 	atomic_inc((atomic_t *)&prof_buffer[eip]);
 }
+ 
+struct notifier_block;
+ 
+#ifdef CONFIG_PROFILING
+ 
+int register_profile_notifier(struct notifier_block * nb);
+int unregister_profile_notifier(struct notifier_block * nb);
+
+#else
+
+static inline int register_profile_notifier(struct notifier_block * nb)
+{
+	return -ENOSYS;
+}
+
+static inline int unregister_profile_notifier(struct notifier_block * nb)
+{
+	return -ENOSYS;
+}
 
+#endif /* CONFIG_PROFILING */
+ 
 #ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
 static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
 	if (IO_APIC_IRQ(i))
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 15c1e91198b0..11fbe9cec572 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,6 +8,17 @@
 #include <linux/init.h>
 #include <asm/errno.h>
  
+/* parse command line */
+int __init profile_setup(char * str);
+ 
+/* init basic kernel profiler */
+void __init profile_init(void);
+
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+
 enum profile_type {
 	EXIT_TASK,
 	EXIT_MMAP,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89c4ead4cf4b..764a3ebf3c24 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -492,10 +492,6 @@ extern unsigned long itimer_ticks;
 extern unsigned long itimer_next;
 extern void do_timer(struct pt_regs *);
 
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
 extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
 extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
diff --git a/init/main.c b/init/main.c
index c6023edc03f3..1850a1c3686d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -30,6 +30,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/security.h>
 #include <linux/workqueue.h>
+#include <linux/profile.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -52,7 +53,6 @@
 #error Sorry, your GCC is too old. It builds incorrect kernels.
 #endif
 
-extern char _stext, _etext;
 extern char *linux_banner;
 
 static int init(void *);
@@ -130,13 +130,6 @@ __setup("maxcpus=", maxcpus);
 static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 
-static int __init profile_setup(char *str)
-{
-    int par;
-    if (get_option(&str,&par)) prof_shift = par;
-	return 1;
-}
-
 __setup("profile=", profile_setup);
 
 static int __init checksetup(char *line)
@@ -411,16 +404,7 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_MODULES
 	init_modules();
 #endif
-	if (prof_shift) {
-		unsigned int size;
-		/* only text is profiled */
-		prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
-		prof_len >>= prof_shift;
-		
-		size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
-		prof_buffer = (unsigned int *) alloc_bootmem(size);
-	}
-
+	profile_init();
 	kmem_cache_init();
 	local_irq_enable();
 	calibrate_delay();
diff --git a/kernel/profile.c b/kernel/profile.c
index 7ebffe971ca8..756f142b1f35 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -9,6 +9,36 @@
 #include <linux/notifier.h>
 #include <linux/mm.h>
 
+extern char _stext, _etext;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+int __init profile_setup(char * str)
+{
+	int par;
+	if (get_option(&str,&par))
+		prof_shift = par;
+	return 1;
+}
+
+
+void __init profile_init(void)
+{
+	unsigned int size;
+ 
+	if (!prof_shift) 
+		return;
+ 
+	/* only text is profiled */
+	prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
+	prof_len >>= prof_shift;
+		
+	size = prof_len * sizeof(unsigned int) + PAGE_SIZE - 1;
+	prof_buffer = (unsigned int *) alloc_bootmem(size);
+}
+
 /* Profile event notifications */
  
 #ifdef CONFIG_PROFILING
diff --git a/kernel/timer.c b/kernel/timer.c
index bf0077634c93..2d30f7fd0ecb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -406,10 +406,6 @@ long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
 long time_reftime;			/* time at last adjustment (s)	*/
 long time_adjust;
 
-unsigned int * prof_buffer;
-unsigned long prof_len;
-unsigned long prof_shift;
-
 /*
  * this routine handles the overflow of the microsecond field
  *
-- 
cgit v1.2.3


From f35e65513f6bd0a346c8e51e78c8893bb3143c9f Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:31:08 -0700
Subject: [PATCH] oprofile - dcookies need to use u32

Make dcookies use a stable size regardless of whether we're
on a 32-bit or 64-bit platform.
---
 drivers/oprofile/buffer_sync.c | 24 ++++++++++++------------
 drivers/oprofile/oprof.c       |  1 -
 fs/dcookies.c                  | 14 +++++++-------
 include/linux/dcookies.h       |  4 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 46360ee22da2..79b92c1c7965 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -118,13 +118,13 @@ void sync_stop(void)
  * because we cannot reach this code without at least one
  * dcookie user still being registered (namely, the reader
  * of the event buffer). */
-static inline unsigned long fast_get_dcookie(struct dentry * dentry,
+static inline u32 fast_get_dcookie(struct dentry * dentry,
 	struct vfsmount * vfsmnt)
 {
-	unsigned long cookie;
+	u32 cookie;
  
 	if (dentry->d_cookie)
-		return (unsigned long)dentry;
+		return (u32)dentry;
 	get_dcookie(dentry, vfsmnt, &cookie);
 	return cookie;
 }
@@ -135,9 +135,9 @@ static inline unsigned long fast_get_dcookie(struct dentry * dentry,
  * not strictly necessary but allows oprofile to associate
  * shared-library samples with particular applications
  */
-static unsigned long get_exec_dcookie(struct mm_struct * mm)
+static u32 get_exec_dcookie(struct mm_struct * mm)
 {
-	unsigned long cookie = 0;
+	u32 cookie = 0;
 	struct vm_area_struct * vma;
  
 	if (!mm)
@@ -163,9 +163,9 @@ out:
  * sure to do this lookup before a mm->mmap modification happens so
  * we don't lose track.
  */
-static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
+static u32 lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
 {
-	unsigned long cookie = 0;
+	u32 cookie = 0;
 	struct vm_area_struct * vma;
 
 	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
@@ -188,7 +188,7 @@ out:
 }
 
 
-static unsigned long last_cookie = ~0UL;
+static u32 last_cookie = ~0UL;
  
 static void add_cpu_switch(int i)
 {
@@ -199,7 +199,7 @@ static void add_cpu_switch(int i)
 }
 
  
-static void add_ctx_switch(pid_t pid, unsigned long cookie)
+static void add_ctx_switch(pid_t pid, u32 cookie)
 {
 	add_event_entry(ESCAPE_CODE);
 	add_event_entry(CTX_SWITCH_CODE); 
@@ -208,7 +208,7 @@ static void add_ctx_switch(pid_t pid, unsigned long cookie)
 }
 
  
-static void add_cookie_switch(unsigned long cookie)
+static void add_cookie_switch(u32 cookie)
 {
 	add_event_entry(ESCAPE_CODE);
 	add_event_entry(COOKIE_SWITCH_CODE);
@@ -225,7 +225,7 @@ static void add_sample_entry(unsigned long offset, unsigned long event)
 
 static void add_us_sample(struct mm_struct * mm, struct op_sample * s)
 {
-	unsigned long cookie;
+	u32 cookie;
 	off_t offset;
  
  	cookie = lookup_dcookie(mm, s->eip, &offset);
@@ -317,7 +317,7 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
 {
 	struct mm_struct * mm = 0;
 	struct task_struct * new;
-	unsigned long cookie;
+	u32 cookie;
 	int i;
  
 	for (i=0; i < cpu_buf->pos; ++i) {
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index 1cae1bc13c8d..91e120f1ac75 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/dcookies.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
 #include <linux/oprofile.h>
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0236c146b451..d589103eb820 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -8,7 +8,7 @@
  * non-transitory that can be processed at a later date.
  * This is done by locking the dentry/vfsmnt pair in the
  * kernel until released by the tasks needing the persistent
- * objects. The tag is simply an unsigned long that refers
+ * objects. The tag is simply an u32 that refers
  * to the pair and can be looked up from userspace.
  */
 
@@ -46,19 +46,19 @@ static inline int is_live(void)
 
 
 /* The dentry is locked, its address will do for the cookie */
-static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
+static inline u32 dcookie_value(struct dcookie_struct * dcs)
 {
-	return (unsigned long)dcs->dentry;
+	return (u32)dcs->dentry;
 }
 
 
-static size_t dcookie_hash(unsigned long dcookie)
+static size_t dcookie_hash(u32 dcookie)
 {
 	return (dcookie >> 2) & (hash_size - 1);
 }
 
 
-static struct dcookie_struct * find_dcookie(unsigned long dcookie)
+static struct dcookie_struct * find_dcookie(u32 dcookie)
 {
 	struct dcookie_struct * found = 0;
 	struct dcookie_struct * dcs;
@@ -109,7 +109,7 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
  * value for a dentry/vfsmnt pair.
  */
 int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie)
+	u32 * cookie)
 {
 	int err = 0;
 	struct dcookie_struct * dcs;
@@ -142,7 +142,7 @@ out:
 /* And here is where the userspace process can look up the cookie value
  * to retrieve the path.
  */
-asmlinkage int sys_lookup_dcookie(unsigned long cookie, char * buf, size_t len)
+asmlinkage int sys_lookup_dcookie(u32 cookie, char * buf, size_t len)
 {
 	char * kbuf;
 	char * path;
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index b2ae9692dc05..7c4d3319e7d0 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -44,7 +44,7 @@ void dcookie_unregister(struct dcookie_user * user);
  * Returns 0 on success, with *cookie filled in
  */
 int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie);
+	u32 * cookie);
 
 #else
 
@@ -59,7 +59,7 @@ void dcookie_unregister(struct dcookie_user * user)
 }
  
 static inline int get_dcookie(struct dentry * dentry,
-	struct vfsmount * vfsmnt, unsigned long * cookie)
+	struct vfsmount * vfsmnt, u32 * cookie)
 {
 	return -ENOSYS;
 } 
-- 
cgit v1.2.3


From de1c3893d67450cc1f3822f84a3178ae24b9a859 Mon Sep 17 00:00:00 2001
From: Tim Wright <timw@splhi.com>
Date: Tue, 15 Oct 2002 05:11:13 -0700
Subject: [PATCH] Forward port of 2.4 fsync_buffers_list() fix.

there was a bug in fysnc_buffers_list() in 2.4 (fixed in 2.4.19) that
could cause the function to return without having written the current
contents of all the buffers.

Obviously, this could be bad for anybody relying on ordering using
O_SYNC or fsync().  If an I/O was already in flight for a particular bh
at the time of the call to fsync_buffers_list(), ll_rw_block() will not
initiate a new I/O even though the contents may have changed.  It is
therefore necessary to wait before the call.  Here's a patch against
2.5.42 that applies the same fix.
---
 fs/buffer.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index d024b78c3e60..35d43421c3a8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -811,6 +811,13 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 			if (buffer_dirty(bh)) {
 				get_bh(bh);
 				spin_unlock(lock);
+				/*
+				 * Ensure any pending I/O completes so that
+				 * ll_rw_block() actually writes the current
+				 * contents - it is a noop if I/O is still in
+				 * flight on potentially older contents.
+				 */
+				wait_on_buffer(bh);
 				ll_rw_block(WRITE, 1, &bh);
 				brelse(bh);
 				spin_lock(lock);
-- 
cgit v1.2.3


From e078deaee0e32326f8aa9ac97146660530fbf8cc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:37 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Clean up nfs_fill_super().

Separate the parsing of the nfs_mount_data struct and the
initialization + sanity checking of the resulting super_block.
The latter is now performed in the new function nfs_sb_init().
---
 fs/nfs/inode.c | 286 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 140 insertions(+), 146 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f7e1e442c9e7..f6cfe092fb13 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -234,131 +234,28 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh)
 }
 
 /*
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Do NFS version-independent mount processing, and sanity checking
  */
-int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+int nfs_sb_init(struct super_block *sb)
 {
 	struct nfs_server	*server;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
 	struct inode		*root_inode = NULL;
-	rpc_authflavor_t	authflavor;
-	struct rpc_timeout	timeparms;
 	struct nfs_fsinfo	fsinfo;
-	int			tcp, version, maxlen;
 
 	/* We probably want something more informative here */
 	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
+	server = NFS_SB(sb);
+
 	sb->s_magic      = NFS_SUPER_MAGIC;
 	sb->s_op         = &nfs_sops;
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize  = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	server           = NFS_SB(sb);
-	server->rsize    = nfs_block_size(data->rsize, NULL);
-	server->wsize    = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	if (data->flags & NFS_MOUNT_NOAC) {
-		data->acregmin = data->acregmax = 0;
-		data->acdirmin = data->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
-	}
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		goto out_unlock;
-	strcpy(server->hostname, data->hostname);
 	INIT_LIST_HEAD(&server->lru_read);
 	INIT_LIST_HEAD(&server->lru_dirty);
 	INIT_LIST_HEAD(&server->lru_commit);
 	INIT_LIST_HEAD(&server->lru_busy);
 
- nfsv3_try_again:
-	server->caps = 0;
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-	if (data->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3
-		server->rpc_ops = &nfs_v3_clientops;
-		version = 3;
-		server->caps |= NFS_CAP_READDIRPLUS;
-		if (data->version < 4) {
-			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
-			goto out_unlock;
-		}
-#else
-		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
-		goto out_unlock;
-#endif
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-		version = 2;
-        }
-
-	/* Which protocol do we use? */
-	tcp   = (data->flags & NFS_MOUNT_TCP);
-
-	/* Initialize timeout values */
-	timeparms.to_initval = data->timeo * HZ / 10;
-	timeparms.to_retries = data->retrans;
-	timeparms.to_maxval  = tcp? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
-	timeparms.to_exponential = 1;
-
-	if (!timeparms.to_initval)
-		timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
-	if (!timeparms.to_retries)
-		timeparms.to_retries = 5;
-
-	/* Now create transport and client */
-	xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
-						&server->addr, &timeparms);
-	if (xprt == NULL)
-		goto out_no_xprt;
-
-	/* Choose authentication flavor */
-	authflavor = RPC_AUTH_UNIX;
-	if (data->flags & NFS_MOUNT_SECURE)
-		authflavor = RPC_AUTH_DES;
-	else if (data->flags & NFS_MOUNT_KERBEROS)
-		authflavor = RPC_AUTH_KRB;
-
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 version, authflavor);
-	if (clnt == NULL)
-		goto out_no_client;
-
-	clnt->cl_intr     = (data->flags & NFS_MOUNT_INTR)? 1 : 0;
-	clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0;
-	clnt->cl_droppriv = (data->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0;
-	clnt->cl_chatty   = 1;
-	server->client    = clnt;
-
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0)
-		goto out_no_iod;
-
-	/*
-	 * Keep the super block locked while we try to get 
-	 * the root fh attributes.
-	 */
 	/* Did getting the root inode fail? */
-	if (!(root_inode = nfs_get_root(sb, &server->fh))
-	    && (data->flags & NFS_MOUNT_VER3)) {
-		data->flags &= ~NFS_MOUNT_VER3;
-		rpciod_down();
-		rpc_shutdown_client(server->client);
-		goto nfsv3_try_again;
-	}
-
+	root_inode = nfs_get_root(sb, &server->fh);
 	if (!root_inode)
 		goto out_no_root;
 	sb->s_root = d_alloc_root(root_inode);
@@ -377,9 +274,9 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
         }
 
 	/* Work out a lot of parameters */
-	if (data->rsize == 0)
+	if (server->rsize == 0)
 		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (data->wsize == 0)
+	if (server->wsize == 0)
 		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
 	/* NFSv3: we don't have bsize, but rather rtmult and wtmult... */
 	if (!fsinfo.bsize)
@@ -391,12 +288,12 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	if (fsinfo.bsize < server->wsize)
 		fsinfo.bsize = server->wsize;
 
-	if (data->bsize == 0)
+	if (sb->s_blocksize == 0)
 		sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits);
-	if (server->rsize > fsinfo.rtmax)
-		server->rsize = fsinfo.rtmax;
-	if (server->wsize > fsinfo.wtmax)
-		server->wsize = fsinfo.wtmax;
+	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
+		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
+	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
+		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
 
 	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (server->rpages > NFS_READ_MAXIOV) {
@@ -416,10 +313,11 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 
-        maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN;
-
-        if (server->namelen == 0 || server->namelen > maxlen)
-                server->namelen = maxlen;
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+		sb->s_flags |= MS_SYNCHRONOUS;
+	}
 
 	sb->s_maxbytes = fsinfo.maxfilesize;
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
@@ -432,44 +330,140 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	}
 
 	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(clnt, server->wsize + 100, server->rsize + 100);
-
-	/* Check whether to start the lockd process */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 	return 0;
-
 	/* Yargs. It didn't work out. */
- failure_kill_reqlist:
+failure_kill_reqlist:
 	nfs_reqlist_exit(server);
+out_free_all:
+	if (root_inode)
+		iput(root_inode);
+	nfs_reqlist_free(server);
+	return -EINVAL;
 out_no_root:
 	printk("nfs_read_super: get root inode failed\n");
-	iput(root_inode);
-	rpciod_down();
-	goto out_shutdown;
+	goto out_free_all;
+}
 
-out_no_iod:
-	printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-out_shutdown:
-	rpc_shutdown_client(server->client);
-	goto out_free_host;
+/*
+ * The way this works is that the mount process passes a structure
+ * in the data argument which contains the server's IP address
+ * and the root file handle obtained from the server's mount
+ * daemon. We stash these away in the private superblock fields.
+ */
+int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+{
+	struct nfs_server	*server;
+	struct rpc_xprt		*xprt = NULL;
+	struct rpc_clnt		*clnt = NULL;
+	struct rpc_timeout	timeparms;
+	int			tcp, err = -EIO;
 
-out_no_client:
-	printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-	xprt_destroy(xprt);
-	goto out_free_host;
+	server           = NFS_SB(sb);
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
 
-out_no_xprt:
-	printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
 
-out_free_host:
-	nfs_reqlist_free(server);
-	kfree(server->hostname);
-out_unlock:
-	goto out_fail;
+	server->namelen  = data->namlen;
+	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
+	if (!server->hostname)
+		goto out_fail;
+	strcpy(server->hostname, data->hostname);
+
+	/* Check NFS protocol revision and initialize RPC op vector
+	 * and file handle pool. */
+	if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3
+		server->rpc_ops = &nfs_v3_clientops;
+		server->caps |= NFS_CAP_READDIRPLUS;
+		if (data->version < 4) {
+			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
+			goto out_fail;
+		}
+#else
+		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
+		goto out_fail;
+#endif
+	} else {
+		server->rpc_ops = &nfs_v2_clientops;
+	}
 
+	/* Which protocol do we use? */
+	tcp   = (data->flags & NFS_MOUNT_TCP);
+
+	/* Initialize timeout values */
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_maxval  = tcp? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
+	timeparms.to_exponential = 1;
+
+	if (!timeparms.to_initval)
+		timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	/* Now create transport and client */
+	xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
+						&server->addr, &timeparms);
+	if (xprt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		goto out_fail;
+	}
+	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				 server->rpc_ops->version, RPC_AUTH_UNIX);
+	if (clnt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		xprt_destroy(xprt);
+		goto out_fail;
+	}
+
+	clnt->cl_intr     = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
+	clnt->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	clnt->cl_droppriv = (server->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0;
+	clnt->cl_chatty   = 1;
+	server->client    = clnt;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		goto out_shutdown;
+	}
+
+	err = nfs_sb_init(sb);
+	if (err != 0)
+		goto out_noinit;
+
+	if (server->flags & NFS_MOUNT_VER3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	/* Check whether to start the lockd process */
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_up();
+	return 0;
+out_noinit:
+	rpciod_down();
+out_shutdown:
+	rpc_shutdown_client(server->client);
 out_fail:
-	return -EINVAL;
+	if (server->hostname)
+		kfree(server->hostname);
+	return err;
 }
 
 static int
-- 
cgit v1.2.3


From b354d9171b5c04720daa2c1f993a10f15e372286 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:42 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Further cleanups

Separate the static and dynamic filesystem data retrieval calls as per the
NFSv3 spec. This also simplifies things for NFSv4, since many of the
attributes in the fsinfo+fstat combined call are not mandatory to
implement.
---
 fs/nfs/inode.c          | 55 ++++++++++++++++++++++++++-----------------------
 fs/nfs/nfs2xdr.c        | 29 +++++---------------------
 fs/nfs/nfs3proc.c       | 40 ++++++++++++++++++++++++++---------
 fs/nfs/nfs3xdr.c        | 18 +++++++---------
 fs/nfs/proc.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/nfs_xdr.h | 28 +++++++++++++++++++++----
 6 files changed, 146 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f6cfe092fb13..940173f909b8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -240,7 +240,13 @@ int nfs_sb_init(struct super_block *sb)
 {
 	struct nfs_server	*server;
 	struct inode		*root_inode = NULL;
-	struct nfs_fsinfo	fsinfo;
+	struct nfs_fattr	fattr;
+	struct nfs_fsinfo	fsinfo = {
+					.fattr = &fattr,
+				};
+	struct nfs_pathconf pathinfo = {
+			.fattr = &fattr,
+	};
 
 	/* We probably want something more informative here */
 	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
@@ -265,31 +271,27 @@ int nfs_sb_init(struct super_block *sb)
 	sb->s_root->d_op = &nfs_dentry_operations;
 
 	/* Get some general file system info */
-        if (server->rpc_ops->statfs(server, &server->fh, &fsinfo) >= 0) {
-		if (server->namelen == 0)
-			server->namelen = fsinfo.namelen;
-	} else {
+        if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) {
 		printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
 		goto out_no_root;
         }
-
+	if (server->namelen == 0 &&
+	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+		server->namelen = pathinfo.max_namelen;
 	/* Work out a lot of parameters */
 	if (server->rsize == 0)
 		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
 	if (server->wsize == 0)
 		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-	/* NFSv3: we don't have bsize, but rather rtmult and wtmult... */
-	if (!fsinfo.bsize)
-		fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult;
-	/* Also make sure we don't go below rsize/wsize since
-	 * RPC calls are expensive */
-	if (fsinfo.bsize < server->rsize)
-		fsinfo.bsize = server->rsize;
-	if (fsinfo.bsize < server->wsize)
-		fsinfo.bsize = server->wsize;
-
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits);
+	if (sb->s_blocksize == 0) {
+		if (fsinfo.wtmult == 0) {
+			sb->s_blocksize = 512;
+			sb->s_blocksize_bits = 9;
+		} else
+			sb->s_blocksize = nfs_block_bits(fsinfo.wtmult,
+							 &sb->s_blocksize_bits);
+	}
+
 	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
 		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
 	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
@@ -472,29 +474,30 @@ nfs_statfs(struct super_block *sb, struct statfs *buf)
 	struct nfs_server *server = NFS_SB(sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fsinfo res;
+	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fattr fattr;
+	struct nfs_fsstat res = {
+			.fattr = &fattr,
+	};
 	int error;
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res);
+	error = server->rpc_ops->statfs(server, rootfh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
 
-	if (res.bsize == 0)
-		res.bsize = sb->s_blocksize;
-	buf->f_bsize = nfs_block_bits(res.bsize, &blockbits);
+	buf->f_bsize = sb->s_blocksize;
+	blockbits = sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
 	buf->f_bavail = (res.abytes + blockres) >> blockbits;
 	buf->f_files = res.tfiles;
 	buf->f_ffree = res.afiles;
-	if (res.namelen == 0 || res.namelen > server->namelen)
-		res.namelen = server->namelen;
-	buf->f_namelen = res.namelen;
 
+	buf->f_namelen = server->namelen;
  out:
 	unlock_kernel();
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8dc92b8b3a1c..8e652afdfea4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -596,37 +596,18 @@ nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode STATFS reply
  */
 static int
-nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
 {
 	int	status;
-	u32	xfer_size;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 
-	/* For NFSv2, we more or less have to guess the preferred
-	 * read/write/readdir sizes from the single 'transfer size'
-	 * value.
-	 */
-	xfer_size = ntohl(*p++);	/* tsize */
-	res->rtmax  = 8 * 1024;
-	res->rtpref = xfer_size;
-	res->rtmult = xfer_size;
-	res->wtmax  = 8 * 1024;
-	res->wtpref = xfer_size;
-	res->wtmult = xfer_size;
-	res->dtpref = PAGE_CACHE_SIZE;
-	res->maxfilesize = 0x7FFFFFFF;	/* just a guess */
+	res->tsize  = ntohl(*p++);
 	res->bsize  = ntohl(*p++);
-
-	res->tbytes = ntohl(*p++) * res->bsize;
-	res->fbytes = ntohl(*p++) * res->bsize;
-	res->abytes = ntohl(*p++) * res->bsize;
-	res->tfiles = 0;
-	res->ffiles = 0;
-	res->afiles = 0;
-	res->namelen = 0;
-
+	res->blocks = ntohl(*p++);
+	res->bfree  = ntohl(*p++);
+	res->bavail = ntohl(*p++);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1ddb51374cba..790c27ead44f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -639,24 +639,42 @@ nfs3_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	return status;
 }
 
-/*
- * This is a combo call of fsstat and fsinfo
- */
 static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-		 struct nfs_fsinfo *info)
+		 struct nfs_fsstat *stat)
 {
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0);
-	if (status < 0)
-		goto error;
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	return status;
+}
 
-error:
-	dprintk("NFS reply statfs: %d\n", status);
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *info)
+{
+	int	status;
+
+	dprintk("NFS call  pathconf\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
 
@@ -824,6 +842,8 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.readdir	= nfs3_proc_readdir,
 	.mknod		= nfs3_proc_mknod,
 	.statfs		= nfs3_proc_statfs,
+	.fsinfo		= nfs3_proc_fsinfo,
+	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
 	.write_setup	= nfs3_proc_write_setup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b0c77b19fff9..2a813fb65365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -912,14 +912,13 @@ nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
  * Decode FSSTAT reply
  */
 static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -940,12 +939,11 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 static int
 nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -959,6 +957,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 	p = xdr_decode_hyper(p, &res->maxfilesize);
 
 	/* ignore time_delta and properties */
+	res->lease_time = 0;
 	return 0;
 }
 
@@ -966,18 +965,17 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
  * Decode PATHCONF reply
  */
 static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
-	res->linkmax = ntohl(*p++);
-	res->namelen = ntohl(*p++);
+	res->max_link = ntohl(*p++);
+	res->max_namelen = ntohl(*p++);
 
 	/* ignore remaining fields */
 	return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 2ad13ec4cd27..a5a1c373444d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -460,17 +460,62 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 
 static int
 nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fsinfo *info)
+			struct nfs_fsstat *stat)
 {
+	struct nfs2_fsstat fsinfo;
 	int	status;
 
 	dprintk("NFS call  statfs\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0);
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply statfs: %d\n", status);
+	if (status)
+		goto out;
+	stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+	stat->fbytes = (u64)fsinfo.bfree  * fsinfo.bsize;
+	stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+	stat->tfiles = 0;
+	stat->ffiles = 0;
+	stat->afiles = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	if (status)
+		goto out;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+out:
 	return status;
 }
 
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_pathconf *info)
+{
+	info->max_link = 0;
+	info->max_namelen = NFS2_MAXNAMLEN;
+	return 0;
+}
+
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
 static void
@@ -590,6 +635,8 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.readdir	= nfs_proc_readdir,
 	.mknod		= nfs_proc_mknod,
 	.statfs		= nfs_proc_statfs,
+	.fsinfo		= nfs_proc_fsinfo,
+	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.write_setup	= nfs_proc_write_setup,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b71b1b217c70..e542fe6982c5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -50,6 +50,7 @@ struct nfs_fattr {
  * Info on the file system
  */
 struct nfs_fsinfo {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u32			rtmax;	/* max.  read transfer size */
 	__u32			rtpref;	/* pref. read transfer size */
 	__u32			rtmult;	/* reads should be multiple of this */
@@ -58,16 +59,31 @@ struct nfs_fsinfo {
 	__u32			wtmult;	/* writes should be multiple of this */
 	__u32			dtpref;	/* pref. readdir transfer size */
 	__u64			maxfilesize;
-	__u64			bsize;	/* block size */
+	__u32			lease_time; /* in seconds */
+};
+
+struct nfs_fsstat {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u64			tbytes;	/* total size in bytes */
 	__u64			fbytes;	/* # of free bytes */
 	__u64			abytes;	/* # of bytes available to user */
 	__u64			tfiles;	/* # of files */
 	__u64			ffiles;	/* # of free files */
 	__u64			afiles;	/* # of files available to user */
-	__u32			linkmax;/* max # of hard links */
-	__u32			namelen;/* max name length */
-	__u32			lease_time; /* in seconds */
+};
+
+struct nfs2_fsstat {
+	__u32			tsize;  /* Server transfer size */
+	__u32			bsize;  /* Filesystem block size */
+	__u32			blocks; /* No. of "bsize" blocks on filesystem */
+	__u32			bfree;  /* No. of free "bsize" blocks */
+	__u32			bavail; /* No. of available "bsize" blocks */
+};
+
+struct nfs_pathconf {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
+	__u32			max_link; /* max # of hard links */
+	__u32			max_namelen; /* max name length */
 };
 
 /*
@@ -391,7 +407,11 @@ struct nfs_rpc_ops {
 	int	(*mknod)   (struct inode *, struct qstr *, struct iattr *,
 			    dev_t, struct nfs_fh *, struct nfs_fattr *);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
+			    struct nfs_fsstat *);
+	int	(*fsinfo)  (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
+			     struct nfs_pathconf *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *, unsigned int count);
 	void	(*write_setup)  (struct nfs_write_data *, unsigned int count, int how);
-- 
cgit v1.2.3


From ee17e0d649279f5cc07ed24506b0fcea53dbe899 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:48 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Define the new NFSv4 data structure for passing user information
from the 'mount' program in nfs4_mount.h.

If CONFIG_NFS_V4 is defined
        Add code to parse the mount structure into the superblock.
        Declare the NFSv4 filesystem to the VFS.
---
 fs/nfs/inode.c             | 238 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs.h        |   2 +-
 include/linux/nfs3.h       |   5 +
 include/linux/nfs4_mount.h |  70 +++++++++++++
 include/linux/nfs_fs.h     |  30 ++++++
 include/linux/nfs_fs_sb.h  |  10 ++
 include/linux/nfs_mount.h  |   8 +-
 7 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/nfs4_mount.h

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 940173f909b8..685d9eb076cb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -28,6 +28,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
 #include <linux/nfs_flushd.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
@@ -157,6 +158,7 @@ nfs_put_super(struct super_block *sb)
 		lockd_down();	/* release rpc.lockd */
 	rpciod_down();		/* release rpciod */
 
+	destroy_nfsv4_state(server);
 	kfree(server->hostname);
 }
 
@@ -1283,6 +1285,239 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME,
 };
 
+#ifdef CONFIG_NFS_V4
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+	struct nfs_server *server;
+	struct rpc_xprt *xprt = NULL;
+	struct rpc_clnt *clnt = NULL;
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavour;
+	int proto, err = -EIO;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	server = NFS_SB(sb);
+	if (data->rsize != 0)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize != 0)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	/* NFSv4 doesn't use NLM locking */
+	server->flags |= NFS_MOUNT_NONLM;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	server->rpc_ops = &nfs_v4_clientops;
+	/* Initialize timeout values */
+
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_exponential = 1;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	proto = data->proto;
+	/* Which IP protocol do we use? */
+	switch (proto) {
+	case IPPROTO_TCP:
+		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 600 * HZ / 10;
+		break;
+	case IPPROTO_UDP:
+		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 11 * HZ / 10;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Now create transport and client */
+	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+	if (xprt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		goto out_fail;
+	}
+
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen > 1)
+			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (copy_from_user(authflavour, data->auth_flavours, sizeof(authflavour))) {
+			err = -EFAULT;
+			goto out_fail;
+		}
+	}
+	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				 server->rpc_ops->version, authflavour);
+	if (clnt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		xprt_destroy(xprt);
+		goto out_fail;
+	}
+
+	clnt->cl_intr     = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0;
+	clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0;
+	clnt->cl_chatty   = 1;
+	server->client    = clnt;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		goto out_shutdown;
+	}
+
+	if (create_nfsv4_state(server, data))
+		goto out_shutdown;
+
+	err = nfs_sb_init(sb);
+	if (err == 0)
+		return 0;
+	rpciod_down();
+	destroy_nfsv4_state(server);
+out_shutdown:
+	rpc_shutdown_client(server->client);
+out_fail:
+	return err;
+}
+
+static int nfs4_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data;
+	struct nfs_server *old = NFS_SB(sb);
+
+	if (strcmp(server->hostname, old->hostname) != 0)
+		return 0;
+	if (strcmp(server->mnt_path, old->mnt_path) != 0)
+		return 0;
+	return 1;
+}
+
+static void *
+nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+{
+	void *p = NULL;
+
+	if (!src->len)
+		return ERR_PTR(-EINVAL);
+	if (src->len < maxlen)
+		maxlen = src->len;
+	if (dst == NULL) {
+		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+		if (p == NULL)
+			return ERR_PTR(-ENOMEM);
+	}
+	if (copy_from_user(dst, src->data, maxlen)) {
+		if (p != NULL)
+			kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	dst[maxlen] = '\0';
+	return dst;
+}
+
+static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, char *dev_name, void *raw_data)
+{
+	int error;
+	struct nfs_server *server;
+	struct super_block *s;
+	struct nfs4_mount_data *data = raw_data;
+	void *p;
+
+	if (!data) {
+		printk("nfs_read_super: missing data argument\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+	memset(server, 0, sizeof(struct nfs_server));
+
+	if (data->version != NFS4_MOUNT_VERSION) {
+		printk("nfs warning: mount version %s than kernel\n",
+			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+	}
+
+	p = nfs_copy_user_string(NULL, &data->hostname, 256);
+	if (IS_ERR(p))
+		goto out_err;
+	server->hostname = p;
+
+	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+	if (IS_ERR(p))
+		goto out_err;
+	server->mnt_path = p;
+
+	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
+			sizeof(server->ip_addr));
+	if (IS_ERR(p))
+		goto out_err;
+
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(server->addr)) {
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
+		s = ERR_PTR(-EFAULT);
+		goto out_free;
+	}
+	if (server->addr.sin_family != AF_INET ||
+	    server->addr.sin_addr.s_addr == INADDR_ANY) {
+		printk("NFS: mount program didn't pass remote IP address!\n");
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+
+	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+
+	if (IS_ERR(s) || s->s_root)
+		goto out_free;
+
+	s->s_flags = flags;
+
+	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	if (error) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return s;
+out_err:
+	s = (struct super_block *)p;
+out_free:
+	if (server->mnt_path)
+		kfree(server->mnt_path);
+	if (server->hostname)
+		kfree(server->hostname);
+	kfree(server);
+	return s;
+}
+
+static struct file_system_type nfs4_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_get_sb,
+	.kill_sb	= nfs_kill_super,
+	.fs_flags	= FS_ODD_RENAME,
+};
+#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
+#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+#else
+#define register_nfs4fs() (0)
+#define unregister_nfs4fs()
+#endif
+
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
@@ -1374,6 +1609,8 @@ static int __init init_nfs_fs(void)
         err = register_filesystem(&nfs_fs_type);
 	if (err)
 		goto out;
+	if ((err = register_nfs4fs()) != 0)
+		goto out;
 	return 0;
 out:
 	rpc_proc_unregister("nfs");
@@ -1398,6 +1635,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_filesystem(&nfs_fs_type);
+	unregister_nfs4fs();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 181e8decebfc..d99650a19b55 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -120,7 +120,7 @@ enum nfs_ftype {
 /*
  * This is the kernel NFS client file handle representation
  */
-#define NFS_MAXFHSIZE		64
+#define NFS_MAXFHSIZE		128
 struct nfs_fh {
 	unsigned short		size;
 	unsigned char		data[NFS_MAXFHSIZE];
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index 359c73e00841..7f11fa589207 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -59,6 +59,11 @@ enum nfs3_ftype {
 	NF3BAD  = 8
 };
 
+struct nfs3_fh {
+	unsigned short size;
+	unsigned char  data[NFS3_FHSIZE];
+};
+
 #define NFS3_VERSION		3
 #define NFS3PROC_NULL		0
 #define NFS3PROC_GETATTR	1
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
new file mode 100644
index 000000000000..9a782c2bbdd3
--- /dev/null
+++ b/include/linux/nfs4_mount.h
@@ -0,0 +1,70 @@
+#ifndef _LINUX_NFS4_MOUNT_H
+#define _LINUX_NFS4_MOUNT_H
+
+/*
+ *  linux/include/linux/nfs4_mount.h
+ *
+ *  Copyright (C) 2002  Trond Myklebust
+ *
+ *  structure passed from user-space to kernel-space during an nfsv4 mount
+ */
+
+/*
+ * WARNING!  Do not delete or change the order of these fields.  If
+ * a new field is required then add it to the end.  The version field
+ * tracks which fields are present.  This will ensure some measure of
+ * mount-to-kernel version compatibility.  Some of these aren't used yet
+ * but here they are anyway.
+ */
+#define NFS4_MOUNT_VERSION	1
+
+struct nfs_string {
+	unsigned int len;
+	const char* data;
+};
+
+struct nfs4_mount_data {
+	int version;				/* 1 */
+	int flags;				/* 1 */
+	int rsize;				/* 1 */
+	int wsize;				/* 1 */
+	int timeo;				/* 1 */
+	int retrans;				/* 1 */
+	int acregmin;				/* 1 */
+	int acregmax;				/* 1 */
+	int acdirmin;				/* 1 */
+	int acdirmax;				/* 1 */
+
+	/* see the definition of 'struct clientaddr4' in RFC3010 */
+	struct nfs_string client_addr;		/* 1 */
+
+	/* Mount path */
+	struct nfs_string mnt_path;		/* 1 */
+
+	/* Server details */
+	struct nfs_string hostname;		/* 1 */
+	/* Server IP address */
+	unsigned int host_addrlen;		/* 1 */
+	struct sockaddr* host_addr;		/* 1 */
+
+	/* Transport protocol to use */
+	int proto;				/* 1 */
+
+	/* Pseudo-flavours to use for authentication. See RFC2623 */
+	int auth_flavourlen;			/* 1 */
+	int *auth_flavours;			/* 1 */
+};
+
+/* bits in the flags field */
+/* Note: the fields that correspond to existing NFSv2/v3 mount options
+ * 	 should mirror the values from include/linux/nfs_mount.h
+ */
+
+#define NFS4_MOUNT_SOFT		0x0001	/* 1 */
+#define NFS4_MOUNT_INTR		0x0002	/* 1 */
+#define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
+#define NFS4_MOUNT_NOAC		0x0020	/* 1 */
+#define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
+#define NFS4_MOUNT_FLAGMASK	0xFFFF
+
+#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4c35f7cbb97c..76ab4ecc3ea8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -472,6 +472,36 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
+#ifdef CONFIG_NFS_V4
+
+extern struct nfs4_client *nfs4_get_client(void);
+extern void nfs4_put_client(struct nfs4_client *clp);
+
+struct nfs4_mount_data;
+static inline int
+create_nfsv4_state(struct nfs_server *server, struct nfs4_mount_data *data)
+{
+	server->nfs4_state = NULL;
+	return 0;
+}
+
+static inline void
+destroy_nfsv4_state(struct nfs_server *server)
+{
+	if (server->mnt_path) {
+		kfree(server->mnt_path);
+		server->mnt_path = NULL;
+	}
+	if (server->nfs4_state) {
+		nfs4_put_client(server->nfs4_state);
+		server->nfs4_state = NULL;
+	}
+}
+#else
+#define create_nfsv4_state(server, data)  0
+#define destroy_nfsv4_state(server)       do { } while (0)
+#endif
+
 #endif /* __KERNEL__ */
 
 /*
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 080c98fed1dd..d171608d7105 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -30,6 +30,16 @@ struct nfs_server {
 				lru_busy;
 	struct nfs_fh		fh;
 	struct sockaddr_in	addr;
+#if CONFIG_NFS_V4
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			ip_addr[16];
+	char *			mnt_path;
+	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
+	unsigned long		lease_time;	/* in jiffies */
+	unsigned long		last_renewal;	/* in jiffies */
+#endif
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 2b552936eeca..223ed3462064 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -10,6 +10,8 @@
  */
 #include <linux/in.h>
 #include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 
 /*
  * WARNING!  Do not delete or change the order of these fields.  If
@@ -37,7 +39,7 @@ struct nfs_mount_data {
 	char		hostname[256];		/* 1 */
 	int		namlen;			/* 2 */
 	unsigned int	bsize;			/* 3 */
-	struct nfs_fh	root;			/* 4 */
+	struct nfs3_fh	root;			/* 4 */
 };
 
 /* bits in the flags field */
@@ -53,6 +55,10 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
+#if 0
+#define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
+#define NFS_MOUNT_SECFLAVOUR	0x2000	/* reserved */
+#endif
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif
-- 
cgit v1.2.3


From bf5344dc4c1c97bd8c837f8fa62315f13784f94d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:53 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Now that all the hooks are in place, this large patch imports all
of the new code for the NFSv4 client.
  nfs4proc.c   - procedure vectors
  nfs4xdr.c    - XDR
  nfs4state.c  - state bookkeeping (very minimal for now)
  nfs4renewd.c - a daemon (implemented as an rpc_task) to keep
                 state from expiring on the server

Note: The RPCSEC_GSS authentication code is not yet included here.
  For the moment we make do with AUTH_UNIX aka. AUTH_SYS.

  Neither is the code to do upcalls to userland in order to do
  uid/gid <-> name mappings. Instead, stubs have been added to
  translate everything to 'nobody:nobody' == '-2:-2'
---
 fs/nfs/Makefile         |    1 +
 fs/nfs/file.c           |    6 +
 fs/nfs/inode.c          |    7 +-
 fs/nfs/nfs4proc.c       | 1577 +++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4renewd.c     |  110 +++
 fs/nfs/nfs4state.c      |   81 +++
 fs/nfs/nfs4xdr.c        | 1777 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h  |   19 +
 include/linux/nfs_xdr.h |  234 ++++++-
 9 files changed, 3807 insertions(+), 5 deletions(-)
 create mode 100644 fs/nfs/nfs4proc.c
 create mode 100644 fs/nfs/nfs4renewd.c
 create mode 100644 fs/nfs/nfs4state.c
 create mode 100644 fs/nfs/nfs4xdr.c

(limited to 'fs')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 836322c2be06..c098a522553b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-objs		:= $(nfs-y)
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3443f647ed2f..f02b7c9c7f36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -259,6 +259,12 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	if (!inode)
 		return -EINVAL;
 
+	/* This will be in a forthcoming patch. */
+	if (NFS_PROTO(inode)->version == 4) {
+		printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n");
+		return -EIO;
+	}
+
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 685d9eb076cb..39027f2af310 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -77,8 +77,13 @@ static struct rpc_version *	nfs_version[] = {
 	NULL,
 	NULL,
 	&nfs_version2,
-#ifdef CONFIG_NFS_V3
+#if defined(CONFIG_NFS_V3)
 	&nfs_version3,
+#elif defined(CONFIG_NFS_V4)
+	NULL,
+#endif
+#if defined(CONFIG_NFS_V4)
+	&nfs_version4,
 #endif
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 000000000000..8608fd9b3a30
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,1577 @@
+/*
+ *  fs/nfs/nfs4proc.c
+ *
+ *  Client-side procedure declarations for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+#define GET_OP(cp,name)		&cp->ops[cp->req_nops].u.name
+#define OPNUM(cp)		cp->ops[cp->req_nops].opnum
+
+extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+
+static nfs4_stateid zero_stateid =
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED;
+
+static void
+nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops,
+		    struct nfs_server *server, char *tag)
+{
+	memset(cp, 0, sizeof(*cp));
+	cp->ops = ops;
+	cp->server = server;
+
+#if NFS4_DEBUG
+	cp->taglen = strlen(tag);
+	cp->tag = tag;
+#endif
+}
+
+static void
+nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access)
+{
+	struct nfs4_access *access = GET_OP(cp, access);
+	
+	access->ac_req_access = req_access;
+	access->ac_resp_supported = resp_supported;
+	access->ac_resp_access = resp_access;
+	
+	OPNUM(cp) = OP_ACCESS;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_close(struct nfs4_compound *cp, nfs4_stateid stateid, u32 seqid)
+{
+	struct nfs4_close *close = GET_OP(cp, close);
+
+	close->cl_stateid = stateid;
+	close->cl_seqid = seqid;
+
+	OPNUM(cp) = OP_CLOSE;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_commit(struct nfs4_compound *cp, u64 start, u32 len, struct nfs_writeverf *verf)
+{
+	struct nfs4_commit *commit = GET_OP(cp, commit);
+
+	commit->co_start = start;
+	commit->co_len = len;
+	commit->co_verifier = verf;
+
+	OPNUM(cp) = OP_COMMIT;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name,
+		      struct iattr *sattr, struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+	
+	create->cr_ftype = NF4DIR;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+	
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name,
+			  struct qstr *linktext, struct iattr *sattr,
+			  struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	create->cr_ftype = NF4LNK;
+	create->cr_textlen = linktext->len;
+	create->cr_text = linktext->name;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name,
+			    dev_t dev, struct iattr *sattr,
+			    struct nfs4_change_info *info)
+{
+	int mode = sattr->ia_mode;
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+	
+	if (S_ISFIFO(mode))
+		create->cr_ftype = NF4FIFO;
+	else if (S_ISBLK(mode)) {
+		create->cr_ftype = NF4BLK;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else if (S_ISCHR(mode)) {
+		create->cr_ftype = NF4CHR;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else
+		create->cr_ftype = NF4SOCK;
+	
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+/*
+ * This is our standard bitmap for GETATTR requests.
+ */
+u32 nfs4_fattr_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+u32 nfs4_statfs_bitmap[2] = {
+	FATTR4_WORD0_FILES_AVAIL
+	| FATTR4_WORD0_FILES_FREE
+	| FATTR4_WORD0_FILES_TOTAL,
+	FATTR4_WORD1_SPACE_AVAIL
+	| FATTR4_WORD1_SPACE_FREE
+	| FATTR4_WORD1_SPACE_TOTAL
+};
+
+u32 nfs4_fsinfo_bitmap[2] = {
+	FATTR4_WORD0_MAXFILESIZE
+	| FATTR4_WORD0_MAXREAD
+        | FATTR4_WORD0_MAXWRITE
+	| FATTR4_WORD0_LEASE_TIME,
+	0
+};
+
+u32 nfs4_pathconf_bitmap[2] = {
+	FATTR4_WORD0_MAXLINK
+	| FATTR4_WORD0_MAXNAME,
+	0
+};
+
+/* mount bitmap: fattr bitmap + lease time */
+u32 nfs4_mount_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID
+	| FATTR4_WORD0_LEASE_TIME,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+static inline void
+__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap,
+		     struct nfs_fattr *fattr,
+		     struct nfs_fsstat *fsstat,
+		     struct nfs_fsinfo *fsinfo,
+		     struct nfs_pathconf *pathconf,
+		     u32 *bmres)
+{
+        struct nfs4_getattr *getattr = GET_OP(cp, getattr);
+
+        getattr->gt_bmval = bitmap;
+        getattr->gt_attrs = fattr;
+	getattr->gt_fsstat = fsstat;
+	getattr->gt_fsinfo = fsinfo;
+	getattr->gt_pathconf = pathconf;
+	getattr->gt_bmres = bmres;
+
+        OPNUM(cp) = OP_GETATTR;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_getattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr,
+			NULL, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_getrootattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_mount_bitmap,
+			fattr, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_statfs(struct nfs4_compound *cp,
+		struct nfs_fsstat *fsstat,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_statfs_bitmap,
+			NULL, fsstat, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_fsinfo(struct nfs4_compound *cp,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap,
+			NULL, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_pathconf(struct nfs4_compound *cp,
+		struct nfs_pathconf *pathconf,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_pathconf_bitmap,
+			NULL, NULL, NULL, pathconf, bmres);
+}
+
+static void
+nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_getfh *getfh = GET_OP(cp, getfh);
+
+	getfh->gf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_GETFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name,
+		struct nfs4_change_info *info)
+{
+	struct nfs4_link *link = GET_OP(cp, link);
+
+	link->ln_namelen = name->len;
+	link->ln_name = name->name;
+	link->ln_cinfo = info;
+
+	OPNUM(cp) = OP_LINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q)
+{
+	struct nfs4_lookup *lookup = GET_OP(cp, lookup);
+
+	lookup->lo_name = q;
+
+	OPNUM(cp) = OP_LOOKUP;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_putfh *putfh = GET_OP(cp, putfh);
+
+	putfh->pf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_PUTFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putrootfh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_PUTROOTFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_open(struct nfs4_compound *cp, int flags, struct qstr *name,
+		struct iattr *sattr, char *stateid, struct nfs4_change_info *cinfo,
+		u32 *rflags)
+{
+	struct nfs4_open *open = GET_OP(cp, open);
+
+	BUG_ON(cp->flags);
+	
+	open->op_share_access = flags & 3;
+	open->op_opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE;
+	open->op_createmode = NFS4_CREATE_UNCHECKED;
+	open->op_attrs = sattr;
+	if (flags & O_EXCL) {
+		u32 *p = (u32 *) open->op_verifier;
+		p[0] = jiffies;
+		p[1] = current->pid;
+		open->op_createmode = NFS4_CREATE_EXCLUSIVE;
+	}
+	open->op_name = name;
+	open->op_stateid = stateid;
+	open->op_cinfo = cinfo;
+	open->op_rflags = rflags;
+
+	OPNUM(cp) = OP_OPEN;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_open_confirm(struct nfs4_compound *cp, char *stateid)
+{
+	struct nfs4_open_confirm *open_confirm = GET_OP(cp, open_confirm);
+	
+	open_confirm->oc_stateid = stateid;
+
+	OPNUM(cp) = OP_OPEN_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_read(struct nfs4_compound *cp, u64 offset, u32 length,
+		struct page **pages, unsigned int pgbase, u32 *eofp, u32 *bytes_read)
+{
+	struct nfs4_read *read = GET_OP(cp, read);
+
+	read->rd_offset = offset;
+	read->rd_length = length;
+	read->rd_pages = pages;
+	read->rd_pgbase = pgbase;
+	read->rd_eof = eofp;
+	read->rd_bytes_read = bytes_read;
+
+	OPNUM(cp) = OP_READ;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier,
+		     struct page **pages, unsigned int bufsize, struct dentry *dentry)
+{
+	u32 *start, *p;
+	struct nfs4_readdir *readdir = GET_OP(cp, readdir);
+
+	BUG_ON(bufsize < 80);
+	readdir->rd_cookie = (cookie > 2) ? cookie : 0;
+	memcpy(readdir->rd_req_verifier, verifier, sizeof(nfs4_verifier));
+	readdir->rd_count = bufsize;
+	readdir->rd_bmval[0] = FATTR4_WORD0_FILEID;
+	readdir->rd_bmval[1] = 0;
+	readdir->rd_pages = pages;
+	readdir->rd_pgbase = 0;
+	
+	OPNUM(cp) = OP_READDIR;
+	cp->req_nops++;
+
+	if (cookie >= 2)
+		return;
+	
+	/*
+	 * NFSv4 servers do not return entries for '.' and '..'
+	 * Therefore, we fake these entries here.  We let '.'
+	 * have cookie 0 and '..' have cookie 1.  Note that
+	 * when talking to the server, we always send cookie 0
+	 * instead of 1 or 2.
+	 */
+	start = p = (u32 *)kmap(*pages);
+	
+	if (cookie == 0) {
+		*p++ = xdr_one;                                  /* next */
+		*p++ = xdr_zero;                   /* cookie, first word */
+		*p++ = xdr_one;                   /* cookie, second word */
+		*p++ = xdr_one;                             /* entry len */
+		memcpy(p, ".\0\0\0", 4);                        /* entry */
+		p++;
+		*p++ = xdr_one;                         /* bitmap length */
+		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+		*p++ = htonl(8);              /* attribute buffer length */
+		p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
+	}
+	
+	*p++ = xdr_one;                                  /* next */
+	*p++ = xdr_zero;                   /* cookie, first word */
+	*p++ = xdr_two;                   /* cookie, second word */
+	*p++ = xdr_two;                             /* entry len */
+	memcpy(p, "..\0\0", 4);                         /* entry */
+	p++;
+	*p++ = xdr_one;                         /* bitmap length */
+	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+	*p++ = htonl(8);              /* attribute buffer length */
+	p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+
+	readdir->rd_pgbase = (char *)p - (char *)start;
+	readdir->rd_count -= readdir->rd_pgbase;
+	kunmap(*pages);
+}
+
+static void
+nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages)
+{
+	struct nfs4_readlink *readlink = GET_OP(cp, readlink);
+
+	readlink->rl_count = count;
+	readlink->rl_pages = pages;
+
+	OPNUM(cp) = OP_READLINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo)
+{
+	struct nfs4_remove *remove = GET_OP(cp, remove);
+
+	remove->rm_namelen = name->len;
+	remove->rm_name = name->name;
+	remove->rm_cinfo = cinfo;
+
+	OPNUM(cp) = OP_REMOVE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new,
+		  struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo)
+{
+	struct nfs4_rename *rename = GET_OP(cp, rename);
+
+	rename->rn_oldnamelen = old->len;
+	rename->rn_oldname = old->name;
+	rename->rn_newnamelen = new->len;
+	rename->rn_newname = new->name;
+	rename->rn_src_cinfo = old_cinfo;
+	rename->rn_dst_cinfo = new_cinfo;
+
+	OPNUM(cp) = OP_RENAME;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_renew(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_RENEW;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_restorefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_RESTOREFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_savefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_SAVEFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_setattr(struct nfs4_compound *cp, char *stateid, struct iattr *iap)
+{
+	struct nfs4_setattr *setattr = GET_OP(cp, setattr);
+
+	setattr->st_stateid = stateid;
+	setattr->st_iap = iap;
+	
+	OPNUM(cp) = OP_SETATTR;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port)
+{
+	struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid);
+	struct nfs_server *server = cp->server;
+	struct timeval tv;
+	u32 *p;
+
+	do_gettimeofday(&tv);
+	p = (u32 *)setclientid->sc_verifier;
+	*p++ = tv.tv_sec;
+	*p++ = tv.tv_usec;
+	setclientid->sc_name = server->ip_addr;
+	sprintf(setclientid->sc_netid, "udp");
+	sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255);
+	setclientid->sc_prog = program;
+	setclientid->sc_cb_ident = 0;
+	
+	OPNUM(cp) = OP_SETCLIENTID;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid_confirm(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_SETCLIENTID_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_write(struct nfs4_compound *cp, u64 offset, u32 length, int stable,
+		 struct page **pages, unsigned int pgbase, u32 *bytes_written,
+		 struct nfs_writeverf *verf)
+{
+	struct nfs4_write *write = GET_OP(cp, write);
+
+	write->wr_offset = offset;
+	write->wr_stable_how = stable;
+	write->wr_len = length;
+	write->wr_bytes_written = bytes_written;
+	write->wr_verf = verf;
+
+	write->wr_pages = pages;
+	write->wr_pgbase = pgbase;
+
+	OPNUM(cp) = OP_WRITE;
+	cp->req_nops++;
+}
+
+static inline void
+process_lease(struct nfs4_compound *cp)
+{
+	struct nfs_server *server;
+	
+        /*
+         * Generic lease processing: If this operation contains a
+	 * lease-renewing operation, and it succeeded, update the RENEW time
+	 * in the superblock.  Instead of the current time, we use the time
+	 * when the request was sent out.  (All we know is that the lease was
+	 * renewed sometime between then and now, and we have to assume the
+	 * worst case.)
+	 *
+	 * Notes:
+	 *   (1) renewd doesn't acquire the spinlock when messing with
+	 *     server->last_renewal; this is OK since rpciod always runs
+	 *     under the BKL.
+	 *   (2) cp->timestamp was set at the end of XDR encode.
+         */
+	if (!cp->renew_index)
+		return;
+	if (!cp->toplevel_status || cp->resp_nops > cp->renew_index) {
+		server = cp->server;
+		spin_lock(&renew_lock);
+		if (server->last_renewal < cp->timestamp)
+			server->last_renewal = cp->timestamp;
+		spin_unlock(&renew_lock);
+	}
+}
+
+static int
+nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags)
+{
+	int status;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = cred,
+	};
+
+	status = rpc_call_sync(cp->server->client, &msg, flags);
+	if (!status)
+		process_lease(cp);
+	
+	return status;
+}
+
+static inline void
+process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr)
+{
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0);
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR_V4) == 0);
+	
+	if (fattr->change_attr == info->after) {
+		fattr->pre_change_attr = info->before;
+		fattr->valid |= NFS_ATTR_PRE_CHANGE;
+		fattr->timestamp = jiffies;
+	}
+}
+
+static int
+do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr,
+	struct nfs_fattr *fattr, struct nfs_fh *fhandle, u32 *seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	u32			rflags;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_open(&compound, flags, name, sattr, stateid, &dir_cinfo, &rflags);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		return status;
+
+	process_cinfo(&dir_cinfo, &dir_attr);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (!(rflags & NFS4_OPEN_RESULT_CONFIRM)) {
+		*seqid = 1;
+		return 0;
+	}
+	*seqid = 2;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open_confirm");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_open_confirm(&compound, stateid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+	   struct nfs_fh *fhandle, struct iattr *sattr, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	u32			bmres[2];
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, server, "setattr");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_setattr(&compound, stateid, sattr);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_close(struct nfs_server *server, struct nfs_fh *fhandle, u32 seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	
+	nfs4_setup_compound(&compound, ops, server, "close");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_close(&compound, stateid, seqid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[4];
+	struct nfs_fsinfo	fsinfo;
+	u32			bmres[2];
+	unsigned char *		p;
+	struct qstr		q;
+	int			status;
+
+	fattr->valid = 0;
+
+	if (!(server->nfs4_state = nfs4_get_client()))
+		return -ENOMEM;
+
+	/* 
+	 * SETCLIENTID.
+	 * Until delegations are imported, we don't bother setting the program
+	 * number and port to anything meaningful.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid");
+	nfs4_setup_setclientid(&compound, 0, 0);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+
+	/*
+	 * SETCLIENTID_CONFIRM, plus root filehandle.
+	 * We also get the lease time here.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid_confirm");
+	nfs4_setup_setclientid_confirm(&compound);
+	nfs4_setup_putrootfh(&compound);
+	nfs4_setup_getrootattr(&compound, fattr, &fsinfo, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+	
+	/*
+	 * Now that we have instantiated the clientid and determined
+	 * the lease time, we can initialize the renew daemon for this
+	 * server.
+	 */
+	server->lease_time = fsinfo.lease_time * HZ;
+	if ((status = nfs4_init_renewd(server)))
+		goto out;
+	
+	/*
+	 * Now we do a seperate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done seperately so that we can conveniently
+	 * catch an ERR_WRONGSEC if it occurs along the way...
+	 */
+	p = server->mnt_path;
+	for (;;) {
+		while (*p == '/')
+			p++;
+		if (!*p)
+			break;
+		q.name = p;
+		while (*p && (*p != '/'))
+			p++;
+		q.len = p - q.name;
+
+		nfs4_setup_compound(&compound, ops, server, "mount");
+		nfs4_setup_putfh(&compound, fhandle);
+		nfs4_setup_lookup(&compound, &q);
+		nfs4_setup_getattr(&compound, fattr, bmres);
+		nfs4_setup_getfh(&compound, fhandle);
+		status = nfs4_call_compound(&compound, NULL, 0);
+		if (!status)
+			continue;
+		if (status == -ENOENT) {
+			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
+			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
+		}
+		break;
+	}
+
+out:
+	return status;
+}
+
+static int
+nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	fattr->valid = 0;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		  struct iattr *sattr)
+{
+	struct inode *		inode = dentry->d_inode;
+	int			size_change = sattr->ia_valid & ATTR_SIZE;
+	struct nfs_fh		throwaway_fh;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int			status;
+
+	fattr->valid = 0;
+	
+	if (size_change) {
+		status = do_open(dentry->d_parent->d_inode, &dentry->d_name,
+				 NFS4_SHARE_ACCESS_WRITE, NULL, fattr,
+				 &throwaway_fh, &seqid, stateid);
+		if (status)
+			return status;
+
+		/*
+		 * Because OPEN is always done by name in nfsv4, it is
+		 * possible that we opened a different file by the same
+		 * name.  We can recognize this race condition, but we
+		 * can't do anything about it besides returning an error.
+		 *
+		 * XXX: Should we compare filehandles too, as in
+		 * nfs_find_actor()?
+		 */
+		if (fattr->fileid != NFS_FILEID(inode)) {
+			printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n");
+			do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+			return -EIO;
+		}
+	}
+	else
+		memcpy(stateid, zero_stateid, sizeof(nfs4_stateid));
+	
+	status = do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, stateid);
+	if (size_change)
+		do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+	return status;
+}
+
+static int
+nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[5];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	dprintk("NFS call  lookup %s\n", name->name);
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_lookup(&compound, name);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	status = nfs4_call_compound(&compound, NULL, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+
+	if (status >= 0)
+		status = nfs_refresh_inode(dir, &dir_attr);
+	return status;
+}
+
+static int
+nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs_fattr	fattr;
+	u32			bmres[2];
+	u32			req_access = 0, resp_supported, resp_access;
+	int			status;
+
+	fattr.valid = 0;
+
+	/*
+	 * Determine which access bits we want to ask for...
+	 */
+	if (mode & MAY_READ)
+		req_access |= NFS4_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_LOOKUP;
+	}
+	else {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_EXECUTE;
+	}
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access);
+	status = nfs4_call_compound(&compound, cred, 0);
+	nfs_refresh_inode(inode, &fattr);
+
+	if (!status) {
+		if (req_access != resp_supported) {
+			printk(KERN_NOTICE "NFS: server didn't support all access bits!\n");
+			status = -ENOTSUPP;
+		}
+		else if (req_access != resp_access)
+			status = -EACCES;
+	}
+	return status;
+}
+
+/*
+ * TODO: For the time being, we don't try to get any attributes
+ * along with any of the zero-copy operations READ, READDIR,
+ * READLINK, WRITE.
+ *
+ * In the case of the first three, we want to put the GETATTR
+ * after the read-type operation -- this is because it is hard
+ * to predict the length of a GETATTR response in v4, and thus
+ * align the READ data correctly.  This means that the GETATTR
+ * may end up partially falling into the page cache, and we should
+ * shift it into the 'tail' of the xdr_buf before processing.
+ * To do this efficiently, we need to know the total length
+ * of data received, which doesn't seem to be available outside
+ * of the RPC layer.
+ *
+ * In the case of WRITE, we also want to put the GETATTR after
+ * the operation -- in this case because we want to make sure
+ * we get the post-operation mtime and size.  This means that
+ * we can't use xdr_encode_pages() as written: we need a variant
+ * of it which would leave room in the 'tail' iovec.
+ *
+ * Both of these changes to the XDR layer would in fact be quite
+ * minor, but I decided to leave them for a subsequent patch.
+ */
+static int
+nfs4_proc_readlink(struct inode *inode, struct page *page)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_read(struct inode *inode, struct rpc_cred *cred,
+	       struct nfs_fattr *fattr, int flags,
+	       unsigned int base, unsigned int count,
+	       struct page *page, int *eofp)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_read;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "read [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_read(&compound, offset, count, &page, base, eofp, &bytes_read);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	if (status >= 0)
+		status = bytes_read;
+	return status;
+}
+
+static int
+nfs4_proc_write(struct inode *inode, struct rpc_cred *cred,
+		struct nfs_fattr *fattr, int flags,
+		unsigned int base, unsigned int count,
+		struct page *page, struct nfs_writeverf *verf)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_written;
+	int			stable = (flags & NFS_RW_SYNC) ? NFS_FILE_SYNC : NFS_UNSTABLE;
+	int			rpcflags = (flags & NFS_RW_SWAP) ? NFS_RPC_SWAPFLAGS : 0;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "write [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_write(&compound, offset, count, stable, &page, base, &bytes_written, verf);
+	status = nfs4_call_compound(&compound, cred, rpcflags);
+	
+	if (status >= 0)
+		status = bytes_written;
+	return status;
+}
+
+static int
+nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		 int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	int			oflags;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int 			status;
+
+	oflags = NFS4_SHARE_ACCESS_READ | O_CREAT | (flags & O_EXCL);
+	status = do_open(dir, name, oflags, sattr, fattr, fhandle, &seqid, stateid);
+	if (!status) {
+		if (flags & O_EXCL)
+			status = do_setattr(NFS_SERVER(dir), fattr, fhandle, sattr, stateid);
+		do_close(NFS_SERVER(dir), fhandle, seqid, stateid);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_remove(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+struct unlink_desc {
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	cinfo;
+	struct nfs_fattr	attrs;
+};
+
+static int
+nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct unlink_desc *	up;
+	struct nfs4_compound *	cp;
+	u32			bmres[2];
+
+	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+	cp = &up->compound;
+	
+	nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup");
+	nfs4_setup_putfh(cp, NFS_FH(dir->d_inode));
+	nfs4_setup_remove(cp, name, &up->cinfo);
+	nfs4_setup_getattr(cp, &up->attrs, bmres);
+	
+	msg->rpc_proc = NFSPROC4_COMPOUND;
+	msg->rpc_argp = cp;
+	msg->rpc_resp = cp;
+	return 0;
+}
+
+static int
+nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct unlink_desc *up;
+	
+	if (msg->rpc_argp) {
+		up = (struct unlink_desc *) msg->rpc_argp;
+		process_lease(&up->compound);
+		process_cinfo(&up->cinfo, &up->attrs);
+		nfs_refresh_inode(dir->d_inode, &up->attrs);
+		kfree(up);
+		msg->rpc_argp = NULL;
+	}
+	return 0;
+}
+
+static int
+nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		 struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	old_cinfo, new_cinfo;
+	struct nfs_fattr	old_dir_attr, new_dir_attr;
+	u32			old_dir_bmres[2], new_dir_bmres[2];
+	int			status;
+
+	old_dir_attr.valid = 0;
+	new_dir_attr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename");
+	nfs4_setup_putfh(&compound, NFS_FH(old_dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(new_dir));
+	nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo);
+	nfs4_setup_getattr(&compound, &new_dir_attr, new_dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &old_dir_attr, old_dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&old_cinfo, &old_dir_attr);
+		process_cinfo(&new_cinfo, &new_dir_attr);
+		nfs_refresh_inode(old_dir, &old_dir_attr);
+		nfs_refresh_inode(new_dir, &new_dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr, fattr;
+	u32			dir_bmres[2], bmres[2];
+	int			status;
+	
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_link(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+		nfs_refresh_inode(inode, &fattr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		  struct iattr *sattr, struct nfs_fh *fhandle,
+		  struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+                  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	int			status;
+
+	lock_kernel();
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fh);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *fsstat)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsstat, 0, sizeof(*fsstat));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_statfs(&compound, fsstat, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *fsinfo)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsinfo, 0, sizeof(*fsinfo));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_fsinfo(&compound, fsinfo, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *pathconf)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(pathconf, 0, sizeof(*pathconf));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_pathconf(&compound, pathconf, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static void
+nfs4_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+	process_lease(&data->u.v4.compound);
+	nfs_readpage_result(task, data->u.v4.res_count, data->u.v4.res_eof);
+}
+
+static void
+nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int flags;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "read [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_read(cp, req_offset(req) + req->wb_offset,
+			count, data->pagevec, req->wb_offset,
+			&data->u.v4.res_eof,
+			&data->u.v4.res_count);
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_readdata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_writeback_done(task, data->u.v4.arg_stable,
+			   data->u.v4.arg_count, data->u.v4.res_count);
+}
+
+static void
+nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int stable;
+	int flags;
+	
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "write [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_write(cp, req_offset(req) + req->wb_offset,
+			 count, stable, data->pagevec, req->wb_offset,
+			 &data->u.v4.res_count, &data->verf);
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_commit_done(task);
+}
+
+static void
+nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};	
+	struct inode *inode = data->inode;
+	int flags;
+	
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "commit [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_commit(cp, start, len, &data->verf);
+	
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+	
+	rpc_call_setup(task, &msg, 0);	
+}
+
+/*
+ * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special
+ * standalone procedure for queueing an asynchronous RENEW.
+ */
+struct renew_desc {
+	struct rpc_task		task;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[1];
+};
+
+static void
+renew_done(struct rpc_task *task)
+{
+	struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp;
+	process_lease(cp);
+}
+
+static void
+renew_release(struct rpc_task *task)
+{
+	kfree(task->tk_calldata);
+	task->tk_calldata = NULL;
+}
+
+int
+nfs4_proc_renew(struct nfs_server *server)
+{
+	struct renew_desc *rp;
+	struct rpc_task *task;
+	struct nfs4_compound *cp;
+	struct rpc_message msg;
+
+	rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL);
+	if (!rp)
+		return -ENOMEM;
+	cp = &rp->compound;
+	task = &rp->task;
+	
+	nfs4_setup_compound(cp, rp->ops, server, "renew");
+	nfs4_setup_renew(cp);
+	
+	msg.rpc_proc = NFSPROC4_COMPOUND;
+	msg.rpc_argp = cp;
+	msg.rpc_resp = cp;
+	msg.rpc_cred = NULL;
+	rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC);
+	rpc_call_setup(task, &msg, 0);
+	task->tk_calldata = rp;
+	task->tk_release = renew_release;
+	
+	return rpc_execute(task);
+}
+
+struct nfs_rpc_ops	nfs_v4_clientops = {
+	.version	= 4,			/* protocol version */
+	.getroot	= nfs4_proc_get_root,
+	.getattr	= nfs4_proc_getattr,
+	.setattr	= nfs4_proc_setattr,
+	.lookup		= nfs4_proc_lookup,
+	.access		= nfs4_proc_access,
+	.readlink	= nfs4_proc_readlink,
+	.read		= nfs4_proc_read,
+	.write		= nfs4_proc_write,
+	.commit		= NULL,
+	.create		= nfs4_proc_create,
+	.remove		= nfs4_proc_remove,
+	.unlink_setup	= nfs4_proc_unlink_setup,
+	.unlink_done	= nfs4_proc_unlink_done,
+	.rename		= nfs4_proc_rename,
+	.link		= nfs4_proc_link,
+	.symlink	= nfs4_proc_symlink,
+	.mkdir		= nfs4_proc_mkdir,
+	.rmdir		= nfs4_proc_remove,
+	.readdir	= nfs4_proc_readdir,
+	.mknod		= nfs4_proc_mknod,
+	.statfs		= nfs4_proc_statfs,
+	.fsinfo		= nfs4_proc_fsinfo,
+	.pathconf	= nfs4_proc_pathconf,
+	.decode_dirent	= nfs4_decode_dirent,
+	.read_setup	= nfs4_proc_read_setup,
+	.write_setup	= nfs4_proc_write_setup,
+	.commit_setup	= nfs4_proc_commit_setup,
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 000000000000..4ba871885dbc
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,110 @@
+/*
+ *  fs/nfs/nfs4renewd.c
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server.  The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context.  There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests.  We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished.  Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue");
+
+static void
+renewd(struct rpc_task *task)
+{
+	struct nfs_server *server = (struct nfs_server *)task->tk_calldata;
+	unsigned long lease = server->lease_time;
+	unsigned long last = server->last_renewal;
+	unsigned long timeout;
+
+	if (!server->nfs4_state)
+		timeout = (2 * lease) / 3;
+	else if (jiffies < last + lease/3)
+		timeout = (2 * lease) / 3 + last - jiffies;
+	else {
+		/* Queue an asynchronous RENEW. */
+		nfs4_proc_renew(server);
+		timeout = (2 * lease) / 3;
+	}
+
+	if (timeout < 5 * HZ)    /* safeguard */
+		timeout = 5 * HZ;
+	task->tk_timeout = timeout;
+	task->tk_action = renewd;
+	task->tk_exit = NULL;
+	rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL);
+	return;
+}
+
+int
+nfs4_init_renewd(struct nfs_server *server)
+{
+	struct rpc_task *task;
+	int status;
+
+	lock_kernel();
+	status = -ENOMEM;
+	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		goto out;
+	task->tk_calldata = server;
+	task->tk_action = renewd;
+	status = rpc_execute(task);
+
+out:
+	unlock_kernel();
+	return status;
+}
+
+/*
+ * Local variables:
+ *   c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 000000000000..ecbc54fb1048
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,81 @@
+/*
+ *  fs/nfs/nfs4state.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model.  For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/nfs_fs.h>
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+struct nfs4_client *
+nfs4_get_client(void)
+{
+        struct nfs4_client *clp;
+
+        if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) {
+                atomic_set(&clp->cl_count, 1);
+                clp->cl_clientid = 0;
+                INIT_LIST_HEAD(&clp->cl_lockowners);
+        }
+        return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+        BUG_ON(!clp);
+        BUG_ON(!atomic_read(&clp->cl_count));
+        
+        if (atomic_dec_and_test(&clp->cl_count)) {
+                BUG_ON(!list_empty(&clp->cl_lockowners));
+                kfree(clp);
+        }
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 000000000000..edbf0e2a02d7
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,1777 @@
+/*
+ *  fs/nfs/nfs4xdr.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+/* Emperically, it seems that the NFS client gets confused if
+ * cookies larger than this are returned -- presumably a
+ * signedness issue?
+ */
+#define COOKIE_MAX		0x7fffffff
+
+#define NFS4_CLIENTID(server)	((server)->nfs4_state->cl_clientid)
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+extern int			nfs_stat_to_errno(int);
+
+#define NFS4_enc_void_sz	0
+#define NFS4_dec_void_sz	0
+#define NFS4_enc_compound_sz	1024  /* XXX: large enough? */
+#define NFS4_dec_compound_sz	1024  /* XXX: large enough? */
+
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+	{ 0,		NFNON	     },
+	{ S_IFREG,	NFREG	     },
+	{ S_IFDIR,	NFDIR	     },
+	{ S_IFBLK,	NFBLK	     },
+	{ S_IFCHR,	NFCHR	     },
+	{ S_IFLNK,	NFLNK	     },
+	{ S_IFSOCK,	NFSOCK	     },
+	{ S_IFIFO,	NFFIFO	     },
+	{ 0,		NFNON	     },
+	{ 0,		NFNON	     },
+};
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define ENCODE_HEAD						\
+	u32 *p;
+#define ENCODE_TAIL						\
+	return 0
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITE64(n)               do {				\
+	*p++ = htonl((u32)((n) >> 32));				\
+	*p++ = htonl((u32)(n));					\
+} while (0)
+#define WRITEMEM(ptr,nbytes)     do {				\
+	p = xdr_writemem(p, ptr, nbytes);			\
+} while (0)
+
+#define RESERVE_SPACE(nbytes)	do { BUG_ON(cp->p + XDR_QUADLEN(nbytes) > cp->end); p = cp->p; } while (0)
+#define ADJUST_ARGS()           cp->p = p
+
+static inline
+u32 *xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+	int tmp = XDR_QUADLEN(nbytes);
+	if (!tmp)
+		return p;
+	p[tmp-1] = 0;
+	memcpy(p, ptr, nbytes);
+	return p + tmp;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_uid(char *p, uid_t uid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_gid(char *p, gid_t gid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+static int
+encode_attrs(struct nfs4_compound *cp, struct iattr *iap)
+{
+	char owner_name[256];
+	char owner_group[256];
+	int owner_namelen = 0;
+	int owner_grouplen = 0;
+	u32 *q;
+	int len;
+	u32 bmval0 = 0;
+	u32 bmval1 = 0;
+	int status;
+	ENCODE_HEAD;
+
+	/*
+	 * We reserve enough space to write the entire attribute buffer at once.
+	 * In the worst-case, this would be
+	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+	 *          = 36 bytes, plus any contribution from variable-length fields
+	 *            such as owner/group/acl's.
+	 */
+	len = 36;
+
+	/* Sigh */
+	if (iap->ia_valid & ATTR_UID) {
+		status = owner_namelen = encode_uid(owner_name, iap->ia_uid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
+			       iap->ia_uid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_namelen);
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		status = owner_grouplen = encode_gid(owner_group, iap->ia_gid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
+			       iap->ia_gid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_grouplen);
+	}
+	RESERVE_SPACE(len);
+
+	/*
+	 * We write the bitmap length now, but leave the bitmap and the attribute
+	 * buffer length to be backfilled at the end of this routine.
+	 */
+	WRITE32(2);
+	q = p;
+	p += 3;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		bmval0 |= FATTR4_WORD0_SIZE;
+		WRITE64(iap->ia_size);
+	}
+	if (iap->ia_valid & ATTR_MODE) {
+		bmval1 |= FATTR4_WORD1_MODE;
+		WRITE32(iap->ia_mode);
+	}
+	if (iap->ia_valid & ATTR_UID) {
+		bmval1 |= FATTR4_WORD1_OWNER;
+		WRITE32(owner_namelen);
+		WRITEMEM(owner_name, owner_namelen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+		WRITE32(owner_grouplen);
+		WRITEMEM(owner_group, owner_grouplen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_ATIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_ATIME) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (iap->ia_valid & ATTR_MTIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_MTIME) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	
+	ADJUST_ARGS();
+	
+	/*
+	 * Now we backfill the bitmap and the attribute buffer length.
+	 */
+	len = (char *)p - (char *)q - 12;
+	*q++ = htonl(bmval0);
+	*q++ = htonl(bmval1);
+	*q++ = htonl(len);
+
+	status = 0;
+out:
+	return status;
+}
+
+static int
+encode_access(struct nfs4_compound *cp, struct nfs4_access *access)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8);
+	WRITE32(OP_ACCESS);
+	WRITE32(access->ac_req_access);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_close(struct nfs4_compound *cp, struct nfs4_close *close)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(20);
+	WRITE32(OP_CLOSE);
+	WRITE32(close->cl_seqid);
+	WRITEMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_commit(struct nfs4_compound *cp, struct nfs4_commit *commit)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(16);
+        WRITE32(OP_COMMIT);
+        WRITE64(commit->co_start);
+        WRITE32(commit->co_len);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_create(struct nfs4_compound *cp, struct nfs4_create *create)
+{
+	ENCODE_HEAD;
+	
+	RESERVE_SPACE(8);
+	WRITE32(OP_CREATE);
+	WRITE32(create->cr_ftype);
+	ADJUST_ARGS();
+
+	switch (create->cr_ftype) {
+	case NF4LNK:
+		RESERVE_SPACE(4 + create->cr_textlen);
+		WRITE32(create->cr_textlen);
+		WRITEMEM(create->cr_text, create->cr_textlen);
+		ADJUST_ARGS();
+		break;
+
+	case NF4BLK: case NF4CHR:
+		RESERVE_SPACE(8);
+		WRITE32(create->cr_specdata1);
+		WRITE32(create->cr_specdata2);
+		ADJUST_ARGS();
+		break;
+
+	default:
+		break;
+	}
+
+	RESERVE_SPACE(4 + create->cr_namelen);
+	WRITE32(create->cr_namelen);
+	WRITEMEM(create->cr_name, create->cr_namelen);
+	ADJUST_ARGS();
+
+	return encode_attrs(cp, create->cr_attrs);
+}
+
+static int
+encode_getattr(struct nfs4_compound *cp, struct nfs4_getattr *getattr)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(16);
+        WRITE32(OP_GETATTR);
+        WRITE32(2);
+        WRITE32(getattr->gt_bmval[0]);
+        WRITE32(getattr->gt_bmval[1]);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_getfh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_GETFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_link(struct nfs4_compound *cp, struct nfs4_link *link)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + link->ln_namelen);
+	WRITE32(OP_LINK);
+	WRITE32(link->ln_namelen);
+	WRITEMEM(link->ln_name, link->ln_namelen);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_lookup(struct nfs4_compound *cp, struct nfs4_lookup *lookup)
+{
+	int len = lookup->lo_name->len;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_LOOKUP);
+	WRITE32(len);
+	WRITEMEM(lookup->lo_name->name, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_open(struct nfs4_compound *cp, struct nfs4_open *open)
+{
+	static int global_id = 0;
+	int id = global_id++;
+	int status;
+	ENCODE_HEAD;
+	
+	/* seqid, share_access, share_deny, clientid, ownerlen, owner, opentype */
+	RESERVE_SPACE(52);
+	WRITE32(OP_OPEN);
+	WRITE32(0);                       /* seqid */
+	WRITE32(open->op_share_access);
+	WRITE32(0);                       /* for us, share_deny== 0 always */
+	WRITE64(NFS4_CLIENTID(cp->server));
+	WRITE32(4);
+	WRITE32(id);
+	WRITE32(open->op_opentype);
+	ADJUST_ARGS();
+	
+	if (open->op_opentype == NFS4_OPEN_CREATE) {
+		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE) {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITEMEM(open->op_verifier, sizeof(nfs4_verifier));
+			ADJUST_ARGS();
+		}
+		else if (open->op_attrs) {
+			RESERVE_SPACE(4);
+			WRITE32(open->op_createmode);
+			ADJUST_ARGS();
+			if ((status = encode_attrs(cp, open->op_attrs)))
+				return status;
+		}
+		else {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITE32(0);
+			WRITE32(0);
+			ADJUST_ARGS();
+		}
+	}
+
+	RESERVE_SPACE(8 + open->op_name->len);
+	WRITE32(NFS4_OPEN_CLAIM_NULL);
+	WRITE32(open->op_name->len);
+	WRITEMEM(open->op_name->name, open->op_name->len);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_open_confirm(struct nfs4_compound *cp, struct nfs4_open_confirm *open_confirm)
+{
+	ENCODE_HEAD;
+
+	/*
+	 * Note: In this "stateless" implementation, the OPEN_CONFIRM
+	 * seqid is always equal to 1.
+	 */
+	RESERVE_SPACE(24);
+	WRITE32(OP_OPEN_CONFIRM);
+	WRITEMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	WRITE32(1);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_putfh(struct nfs4_compound *cp, struct nfs4_putfh *putfh)
+{
+	int len = putfh->pf_fhandle->size;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_PUTFH);
+	WRITE32(len);
+	WRITEMEM(putfh->pf_fhandle->data, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_putrootfh(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(4);
+        WRITE32(OP_PUTROOTFH);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_read(struct nfs4_compound *cp, struct nfs4_read *read, struct rpc_rqst *req)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	int		replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(32);
+	WRITE32(OP_READ);
+	WRITE32(0);   /* all-zero stateid! */
+	WRITE32(0);
+	WRITE32(0);
+	WRITE32(0);
+	WRITE64(read->rd_offset);
+	WRITE32(read->rd_length);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel status + taglen + rescount + OP_PUTFH + status
+	 *       + OP_READ + status + eof + datalen = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 read->rd_pages, read->rd_pgbase, read->rd_length);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readdir(struct nfs4_compound *cp, struct nfs4_readdir *readdir, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(40);
+	WRITE32(OP_READDIR);
+	WRITE64(readdir->rd_cookie);
+	WRITEMEM(readdir->rd_req_verifier, sizeof(nfs4_verifier));
+	WRITE32(readdir->rd_count >> 5);  /* meaningless "dircount" field */
+	WRITE32(readdir->rd_count);
+	WRITE32(2);
+	WRITE32(readdir->rd_bmval[0]);
+	WRITE32(readdir->rd_bmval[1]);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READDIR + status + verifer(2)  = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages,
+			 readdir->rd_pgbase, readdir->rd_count);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readlink(struct nfs4_compound *cp, struct nfs4_readlink *readlink, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_READLINK);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READLINK + status  = 7
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 7 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count);
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_remove(struct nfs4_compound *cp, struct nfs4_remove *remove)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + remove->rm_namelen);
+	WRITE32(OP_REMOVE);
+	WRITE32(remove->rm_namelen);
+	WRITEMEM(remove->rm_name, remove->rm_namelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_rename(struct nfs4_compound *cp, struct nfs4_rename *rename)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + rename->rn_oldnamelen);
+	WRITE32(OP_RENAME);
+	WRITE32(rename->rn_oldnamelen);
+	WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen);
+	ADJUST_ARGS();
+	
+	RESERVE_SPACE(8 + rename->rn_newnamelen);
+	WRITE32(rename->rn_newnamelen);
+	WRITEMEM(rename->rn_newname, rename->rn_newnamelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_renew(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(12);
+	WRITE32(OP_RENEW);
+	WRITE64(NFS4_CLIENTID(cp->server));
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_restorefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_RESTOREFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_savefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_SAVEFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setattr(struct nfs4_compound *cp, struct nfs4_setattr *setattr)
+{
+	int status;
+	ENCODE_HEAD;
+	
+        RESERVE_SPACE(20);
+        WRITE32(OP_SETATTR);
+	WRITEMEM(setattr->st_stateid, sizeof(nfs4_stateid));
+        ADJUST_ARGS();
+
+        if ((status = encode_attrs(cp, setattr->st_iap)))
+		return status;
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_setclientid(struct nfs4_compound *cp, struct nfs4_setclientid *setclientid)
+{
+	u32 total_len;
+	u32 len1, len2, len3;
+	ENCODE_HEAD;
+
+	len1 = strlen(setclientid->sc_name);
+	len2 = strlen(setclientid->sc_netid);
+	len3 = strlen(setclientid->sc_uaddr);
+	total_len = XDR_QUADLEN(len1) + XDR_QUADLEN(len2) + XDR_QUADLEN(len3);
+	total_len = (total_len << 2) + 32;
+
+	RESERVE_SPACE(total_len);
+	WRITE32(OP_SETCLIENTID);
+	WRITEMEM(setclientid->sc_verifier, sizeof(nfs4_verifier));
+	WRITE32(len1);
+	WRITEMEM(setclientid->sc_name, len1);
+	WRITE32(setclientid->sc_prog);
+	WRITE32(len2);
+	WRITEMEM(setclientid->sc_netid, len2);
+	WRITE32(len3);
+	WRITEMEM(setclientid->sc_uaddr, len3);
+	WRITE32(setclientid->sc_cb_ident);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setclientid_confirm(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(12 + sizeof(nfs4_verifier));
+        WRITE32(OP_SETCLIENTID_CONFIRM);
+        WRITE64(cp->server->nfs4_state->cl_clientid);
+        WRITEMEM(cp->server->nfs4_state->cl_confirm,sizeof(nfs4_verifier));
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_write(struct nfs4_compound *cp, struct nfs4_write *write, struct rpc_rqst *req)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(36);
+	WRITE32(OP_WRITE);
+	WRITE32(0xffffffff);     /* magic stateid -1 */
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE64(write->wr_offset);
+	WRITE32(write->wr_stable_how);
+	WRITE32(write->wr_len);
+	ADJUST_ARGS();
+
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+	xdr_encode_pages(sndbuf, write->wr_pages, write->wr_pgbase, write->wr_len);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	int i, status = 0;
+	ENCODE_HEAD;
+
+	dprintk("encode_compound: tag=%.*s\n", (int)cp->taglen, cp->tag);
+	
+	RESERVE_SPACE(12 + cp->taglen);
+	WRITE32(cp->taglen);
+	WRITEMEM(cp->tag, cp->taglen);
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(cp->req_nops);
+	ADJUST_ARGS();
+
+	for (i = 0; i < cp->req_nops; i++) {
+		switch (cp->ops[i].opnum) {
+		case OP_ACCESS:
+			status = encode_access(cp, &cp->ops[i].u.access);
+			break;
+		case OP_CLOSE:
+			status = encode_close(cp, &cp->ops[i].u.close);
+			break;
+		case OP_COMMIT:
+			status = encode_commit(cp, &cp->ops[i].u.commit);
+			break;
+		case OP_CREATE:
+			status = encode_create(cp, &cp->ops[i].u.create);
+			break;
+		case OP_GETATTR:
+			status = encode_getattr(cp, &cp->ops[i].u.getattr);
+			break;
+		case OP_GETFH:
+			status = encode_getfh(cp);
+			break;
+		case OP_LINK:
+			status = encode_link(cp, &cp->ops[i].u.link);
+			break;
+		case OP_LOOKUP:
+			status = encode_lookup(cp, &cp->ops[i].u.lookup);
+			break;
+		case OP_OPEN:
+			status = encode_open(cp, &cp->ops[i].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = encode_open_confirm(cp, &cp->ops[i].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = encode_putfh(cp, &cp->ops[i].u.putfh);
+			break;
+		case OP_PUTROOTFH:
+			status = encode_putrootfh(cp);
+			break;
+		case OP_READ:
+			status = encode_read(cp, &cp->ops[i].u.read, req);
+			break;
+		case OP_READDIR:
+			status = encode_readdir(cp, &cp->ops[i].u.readdir, req);
+			break;
+		case OP_READLINK:
+			status = encode_readlink(cp, &cp->ops[i].u.readlink, req);
+			break;
+		case OP_REMOVE:
+			status = encode_remove(cp, &cp->ops[i].u.remove);
+			break;
+		case OP_RENAME:
+			status = encode_rename(cp, &cp->ops[i].u.rename);
+			break;
+		case OP_RENEW:
+			status = encode_renew(cp);
+			break;
+		case OP_RESTOREFH:
+			status = encode_restorefh(cp);
+			break;
+		case OP_SAVEFH:
+			status = encode_savefh(cp);
+			break;
+		case OP_SETATTR:
+			status = encode_setattr(cp, &cp->ops[i].u.setattr);
+			break;
+		case OP_SETCLIENTID:
+			status = encode_setclientid(cp, &cp->ops[i].u.setclientid);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = encode_setclientid_confirm(cp);
+			break;
+		case OP_WRITE:
+			status = encode_write(cp, &cp->ops[i].u.write, req);
+			break;
+		default:
+			BUG();
+		}
+		if (status)
+			return status;
+	}
+	
+	ENCODE_TAIL;
+}
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+
+/*
+ * Encode void argument
+ */
+static int
+nfs4_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode COMPOUND argument
+ */
+static int
+nfs4_xdr_enc_compound(struct rpc_rqst *req, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((char *)req->rq_svec[0].iov_base + req->rq_svec[0].iov_len);
+	status = encode_compound(cp, req);
+	cp->timestamp = jiffies;
+
+	if (!status && !sndbuf->page_len)
+		req->rq_slen = xdr_adjust_iovec(sndbuf->head, cp->p);
+	return status;
+}
+
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define DECODE_HEAD				\
+	u32 *p;					\
+	int status
+#define DECODE_TAIL				\
+	status = 0;				\
+out:						\
+	return status;				\
+xdr_error:					\
+	printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+	status = -EIO;				\
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {			\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define READTIME(x)       do {			\
+	p++;					\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+#define READ_BUF(nbytes)  do {			\
+	if (nbytes > (u32)((char *)cp->end - (char *)cp->p))  \
+		goto xdr_error;			\
+	p = cp->p;				\
+	cp->p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_uid(char *p, u32 len, uid_t *uid)
+{
+	*uid = -2;
+	return 0;
+}
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_gid(char *p, u32 len, gid_t *gid)
+{
+	*gid = -2;
+	return 0;
+}
+
+static int
+decode_change_info(struct nfs4_compound *cp, struct nfs4_change_info *cinfo)
+{
+	DECODE_HEAD;
+
+	READ_BUF(20);
+	READ32(cinfo->atomic);
+	READ64(cinfo->before);
+	READ64(cinfo->after);
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_access(struct nfs4_compound *cp, int nfserr, struct nfs4_access *access)
+{
+	u32 supp, acc;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		READ32(supp);
+		READ32(acc);
+
+		status = -EIO;
+		if ((supp & ~access->ac_req_access) || (acc & ~supp)) {
+			printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n");
+			goto out;
+		}
+		*access->ac_resp_supported = supp;
+		*access->ac_resp_access = acc;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_close(struct nfs4_compound *cp, int nfserr, struct nfs4_close *close)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_commit(struct nfs4_compound *cp, int nfserr, struct nfs4_commit *commit)
+{
+        DECODE_HEAD;
+
+        if (!nfserr) {
+                READ_BUF(8);
+                COPYMEM(commit->co_verifier->verifier, 8);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_create(struct nfs4_compound *cp, int nfserr, struct nfs4_create *create)
+{
+	u32 bmlen;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, create->cr_cinfo)))
+			goto out;
+		READ_BUF(4);
+		READ32(bmlen);
+		if (bmlen > 2)
+			goto xdr_error;
+		READ_BUF(bmlen << 2);
+	}
+
+	DECODE_TAIL;
+}
+
+extern u32 nfs4_fattr_bitmap[2];
+extern u32 nfs4_fsinfo_bitmap[2];
+extern u32 nfs4_fsstat_bitmap[2];
+extern u32 nfs4_pathconf_bitmap[2];
+
+static int
+decode_getattr(struct nfs4_compound *cp, int nfserr, struct nfs4_getattr *getattr)
+{
+        struct nfs_fattr *nfp = getattr->gt_attrs;
+	struct nfs_fsstat *fsstat = getattr->gt_fsstat;
+	struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo;
+	struct nfs_pathconf *pathconf = getattr->gt_pathconf;
+        u32 bmlen;
+        u32 bmval0 = 0;
+        u32 bmval1 = 0;
+        u32 attrlen;
+        u32 dummy32;
+        u32 len = 0;
+	unsigned int type;
+	int fmode = 0;
+        DECODE_HEAD;
+	
+        if (nfserr)
+                goto success;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 2)
+                goto xdr_error;
+	
+        READ_BUF((bmlen << 2) + 4);
+        if (bmlen > 0)
+                READ32(bmval0);
+        if (bmlen > 1)
+                READ32(bmval1);
+        READ32(attrlen);
+
+	if ((bmval0 & ~getattr->gt_bmval[0]) ||
+	    (bmval1 & ~getattr->gt_bmval[1])) {
+		dprintk("read_attrs: server returned bad attributes!\n");
+		goto xdr_error;
+	}
+	getattr->gt_bmres[0] = bmval0;
+	getattr->gt_bmres[1] = bmval1;
+
+	/*
+	 * In case the server doesn't return some attributes,
+	 * we initialize them here to some nominal values..
+	 */
+	if (nfp) {
+		nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+		nfp->nlink = 1;
+		nfp->timestamp = jiffies;
+	}
+	if (fsinfo) {
+		fsinfo->rtmult = fsinfo->wtmult = 512;  /* ??? */
+		fsinfo->lease_time = 60;
+	}
+
+        if (bmval0 & FATTR4_WORD0_TYPE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(type);
+                if (type < NF4REG || type > NF4NAMEDATTR) {
+                        dprintk("read_attrs: bad type %d\n", type);
+                        goto xdr_error;
+                }
+		nfp->type = nfs_type2fmt[type].nfs2type;
+		fmode = nfs_type2fmt[type].mode;
+                dprintk("read_attrs: type=%d\n", (u32)nfp->type);
+        }
+        if (bmval0 & FATTR4_WORD0_CHANGE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->change_attr);
+                dprintk("read_attrs: changeid=%Ld\n", (u64)nfp->change_attr);
+        }
+        if (bmval0 & FATTR4_WORD0_SIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->size);
+                dprintk("read_attrs: size=%Ld\n", (u64)nfp->size);
+        }
+        if (bmval0 & FATTR4_WORD0_FSID) {
+                READ_BUF(16);
+                len += 16;
+                READ64(nfp->fsid_u.nfs4.major);
+                READ64(nfp->fsid_u.nfs4.minor);
+                dprintk("read_attrs: fsid=0x%Lx/0x%Lx\n",
+			nfp->fsid_u.nfs4.major, nfp->fsid_u.nfs4.minor);
+        }
+        if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(fsinfo->lease_time);
+                dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time);
+        }
+        if (bmval0 & FATTR4_WORD0_FILEID) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->fileid);
+                dprintk("read_attrs: fileid=%Ld\n", nfp->fileid);
+        }
+	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+		READ_BUF(8);
+		len += 8;
+		READ64(fsstat->afiles);
+		dprintk("read_attrs: files_avail=0x%Lx\n", fsstat->afiles);
+	}
+        if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->ffiles);
+                dprintk("read_attrs: files_free=0x%Lx\n", fsstat->ffiles);
+        }
+        if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tfiles);
+                dprintk("read_attrs: files_tot=0x%Lx\n", fsstat->tfiles);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->maxfilesize);
+                dprintk("read_attrs: maxfilesize=0x%Lx\n", fsinfo->maxfilesize);
+        }
+	if (bmval0 & FATTR4_WORD0_MAXLINK) {
+		READ_BUF(4);
+		len += 4;
+		READ32(pathconf->max_link);
+		dprintk("read_attrs: maxlink=%d\n", pathconf->max_link);
+	}
+        if (bmval0 & FATTR4_WORD0_MAXNAME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(pathconf->max_namelen);
+                dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXREAD) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->rtmax);
+		fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
+                dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->wtmax);
+		fsinfo->wtpref = fsinfo->wtmax;
+                dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax);
+        }
+	
+        if (bmval1 & FATTR4_WORD1_MODE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+		nfp->mode = (dummy32 & ~S_IFMT) | fmode;
+                dprintk("read_attrs: mode=0%o\n", nfp->mode);
+        }
+        if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+                READ_BUF(4);
+                len += 4;
+                READ32(nfp->nlink);
+                dprintk("read_attrs: nlinks=0%o\n", nfp->nlink);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);    /* name length */
+                if (dummy32 > XDR_MAX_NETOBJ) {
+			dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_uid((char *)p, dummy32, &nfp->uid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: uid=%d\n", (int)nfp->uid);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+                if (dummy32 > XDR_MAX_NETOBJ) {
+                        dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_gid((char *)p, dummy32, &nfp->gid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: gid=%d\n", (int)nfp->gid);
+        }
+        if (bmval1 & FATTR4_WORD1_RAWDEV) {
+                READ_BUF(8);
+                len += 8;
+                READ32(dummy32);
+		nfp->rdev = (dummy32 << MINORBITS);
+                READ32(dummy32);
+		nfp->rdev |= (dummy32 & MINORMASK);
+                dprintk("read_attrs: rdev=%d\n", nfp->rdev);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->abytes);
+                dprintk("read_attrs: savail=0x%Lx\n", fsstat->abytes);
+        }
+	if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->fbytes);
+                dprintk("read_attrs: sfree=0x%Lx\n", fsstat->fbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tbytes);
+                dprintk("read_attrs: stotal=0x%Lx\n", fsstat->tbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->du.nfs3.used);
+                dprintk("read_attrs: sused=0x%Lx\n", nfp->du.nfs3.used);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->atime);
+                dprintk("read_attrs: atime=%d\n", (int)nfp->atime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->ctime);
+                dprintk("read_attrs: ctime=%d\n", (int)nfp->ctime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->mtime);
+                dprintk("read_attrs: mtime=%d\n", (int)nfp->mtime);
+        }
+        if (len != attrlen)
+                goto xdr_error;
+	
+success:
+        DECODE_TAIL;
+}
+
+static int
+decode_getfh(struct nfs4_compound *cp, int nfserr, struct nfs4_getfh *getfh)
+{
+	struct nfs_fh *fh = getfh->gf_fhandle;
+	int len;
+        DECODE_HEAD;
+
+	/* Zero handle first to allow comparisons */
+	memset(fh, 0, sizeof(*fh));
+		
+        if (!nfserr) {
+                READ_BUF(4);
+		READ32(len);
+		if (len > NFS_MAXFHSIZE)
+			goto xdr_error;
+		fh->size = len;
+                READ_BUF(len);
+                COPYMEM(fh->data, len);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_link(struct nfs4_compound *cp, int nfserr, struct nfs4_link *link)
+{
+	int status = 0;
+	
+	if (!nfserr)
+		status = decode_change_info(cp, link->ln_cinfo);
+	return status;
+}
+
+static int
+decode_open(struct nfs4_compound *cp, int nfserr, struct nfs4_open *open)
+{
+	u32 bmlen, delegation_type;
+	DECODE_HEAD;
+	
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open->op_stateid, sizeof(nfs4_stateid));
+
+		decode_change_info(cp, open->op_cinfo);
+
+		READ_BUF(8);
+		READ32(*open->op_rflags);
+		READ32(bmlen);
+		if (bmlen > 10)
+			goto xdr_error;
+		
+		READ_BUF((bmlen << 2) + 4);
+		p += bmlen;
+		READ32(delegation_type);
+		if (delegation_type != NFS4_OPEN_DELEGATE_NONE)
+			goto xdr_error;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_open_confirm(struct nfs4_compound *cp, int nfserr, struct nfs4_open_confirm *open_confirm)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_read(struct nfs4_compound *cp, int nfserr, struct nfs4_read *read)
+{
+	u32 throwaway;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		if (read->rd_eof)
+			READ32(*read->rd_eof);
+		else
+			READ32(throwaway);
+		READ32(*read->rd_bytes_read);
+		if (*read->rd_bytes_read > read->rd_length)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_readdir(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readdir *readdir)
+{
+	struct xdr_buf	*rcvbuf = &req->rq_rcv_buf;
+	struct page	*page = *rcvbuf->pages;
+	unsigned int	pglen = rcvbuf->page_len;
+	u32		*end, *entry;
+	u32		len, attrlen, word;
+	int 		i;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		COPYMEM(readdir->rd_resp_verifier, 8);
+
+		BUG_ON(pglen > PAGE_CACHE_SIZE);
+		p   = (u32 *) kmap(page);
+		end = (u32 *) ((char *)p + pglen + readdir->rd_pgbase);
+
+		while (*p++) {
+			entry = p - 1;
+			if (p + 3 > end)
+				goto short_pkt;
+			p += 2;     /* cookie */
+			len = ntohl(*p++);  /* filename length */
+			if (len > NFS4_MAXNAMLEN) {
+				printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			
+			p += XDR_QUADLEN(len);
+			if (p + 1 > end)
+				goto short_pkt;
+			len = ntohl(*p++);  /* bitmap length */
+			if (len > 10) {
+				printk(KERN_WARNING "NFS: giant bitmap in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			if (p + len + 1 > end)
+				goto short_pkt;
+			attrlen = 0;
+			for (i = 0; i < len; i++) {
+				word = ntohl(*p++);
+				if (!word)
+					continue;
+				else if (i == 0 && word == FATTR4_WORD0_FILEID) {
+					attrlen = 8;
+					continue;
+				}
+				printk(KERN_WARNING "NFS: unexpected bitmap word in readdir (0x%x)\n", word);
+				goto err_unmap;
+			}
+			if (ntohl(*p++) != attrlen) {
+				printk(KERN_WARNING "NFS: unexpected attrlen in readdir\n");
+				goto err_unmap;
+			}
+			p += XDR_QUADLEN(attrlen);
+			if (p + 1 > end)
+				goto short_pkt;
+		}
+		kunmap(page);
+	}
+	
+	DECODE_TAIL;
+short_pkt:
+	printk(KERN_NOTICE "NFS: short packet in readdir reply!\n");
+	/* truncate listing */
+	kunmap(page);
+	entry[0] = entry[1] = 0;
+	return 0;
+err_unmap:
+	kunmap(page);
+	return -errno_NFSERR_IO;
+}
+
+static int
+decode_readlink(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readlink *readlink)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	u32 *strlen;
+	u32 len;
+	char *string;
+
+	if (!nfserr) {
+		/*
+		 * The XDR encode routine has set things up so that
+		 * the link text will be copied directly into the
+		 * buffer.  We just have to do overflow-checking,
+		 * and and null-terminate the text (the VFS expects
+		 * null-termination).
+		 */
+		strlen = (u32 *) kmap(rcvbuf->pages[0]);
+		len = ntohl(*strlen);
+		if (len > PAGE_CACHE_SIZE - 5) {
+			printk(KERN_WARNING "nfs: server returned giant symlink!\n");
+			kunmap(rcvbuf->pages[0]);
+			return -EIO;
+		}
+		*strlen = len;
+		
+		string = (char *)(strlen + 1);
+		string[len] = '\0';
+		kunmap(rcvbuf->pages[0]);
+	}
+	return 0;
+}
+
+static int
+decode_remove(struct nfs4_compound *cp, int nfserr, struct nfs4_remove *remove)
+{
+	int status;
+
+	status = 0;
+	if (!nfserr) 
+		status = decode_change_info(cp, remove->rm_cinfo);
+	return status;
+}
+
+static int
+decode_rename(struct nfs4_compound *cp, int nfserr, struct nfs4_rename *rename)
+{
+	int status = 0;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, rename->rn_src_cinfo)))
+			goto out;
+		if ((status = decode_change_info(cp, rename->rn_dst_cinfo)))
+			goto out;
+	}
+out:
+	return status;
+}
+
+static int
+decode_setattr(struct nfs4_compound *cp)
+{
+        u32 bmlen;
+        DECODE_HEAD;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 10)
+                goto xdr_error;
+        READ_BUF(bmlen << 2);
+
+        DECODE_TAIL;
+}
+
+static int
+decode_setclientid(struct nfs4_compound *cp, int nfserr)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8 + sizeof(nfs4_verifier));
+		READ64(cp->server->nfs4_state->cl_clientid);
+		COPYMEM(cp->server->nfs4_state->cl_confirm, sizeof(nfs4_verifier));
+	}
+	else if (nfserr == NFSERR_CLID_INUSE) {
+		u32 len;
+
+		/* skip netid string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+
+		/* skip uaddr string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_write(struct nfs4_compound *cp, int nfserr, struct nfs4_write *write)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(16);
+		READ32(*write->wr_bytes_written);
+		if (*write->wr_bytes_written > write->wr_len)
+			goto xdr_error;
+		READ32(write->wr_verf->committed);
+		COPYMEM(write->wr_verf->verifier, 8);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	u32 taglen;
+	u32 opnum, nfserr;
+	DECODE_HEAD;
+
+	READ_BUF(8);
+	READ32(cp->toplevel_status);
+	READ32(taglen);
+
+	/*
+	 * We need this if our zero-copy I/O is going to work.  Rumor has
+	 * it that the spec will soon mandate it...
+	 */
+	if (taglen != cp->taglen)
+		dprintk("nfs4: non-conforming server returns tag length mismatch!\n");
+
+	READ_BUF(taglen + 4);
+	p += XDR_QUADLEN(taglen);
+	READ32(cp->resp_nops);
+	if (cp->resp_nops > cp->req_nops) {
+		dprintk("nfs4: resp_nops > req_nops!\n");
+		goto xdr_error;
+	}
+
+	for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++) {
+		READ_BUF(8);
+		READ32(opnum);
+		if (opnum != cp->ops[cp->nops].opnum) {
+			dprintk("nfs4: operation mismatch!\n");
+			goto xdr_error;
+		}
+		READ32(nfserr);
+		if (cp->nops == cp->resp_nops - 1) {
+			if (nfserr != cp->toplevel_status) {
+				dprintk("nfs4: status mismatch!\n");
+				goto xdr_error;
+			}
+		}
+		else if (nfserr) {
+			dprintk("nfs4: intermediate status nonzero!\n");
+			goto xdr_error;
+		}
+		cp->ops[cp->nops].nfserr = nfserr;
+
+		switch (opnum) {
+		case OP_ACCESS:
+			status = decode_access(cp, nfserr, &cp->ops[cp->nops].u.access);
+			break;
+		case OP_CLOSE:
+			status = decode_close(cp, nfserr, &cp->ops[cp->nops].u.close);
+			break;
+		case OP_COMMIT:
+			status = decode_commit(cp, nfserr, &cp->ops[cp->nops].u.commit);
+			break;
+		case OP_CREATE:
+			status = decode_create(cp, nfserr, &cp->ops[cp->nops].u.create);
+			break;
+		case OP_GETATTR:
+			status = decode_getattr(cp, nfserr, &cp->ops[cp->nops].u.getattr);
+			break;
+		case OP_GETFH:
+			status = decode_getfh(cp, nfserr, &cp->ops[cp->nops].u.getfh);
+			break;
+		case OP_LINK:
+			status = decode_link(cp, nfserr, &cp->ops[cp->nops].u.link);
+			break;
+		case OP_LOOKUP:
+			status = 0;
+			break;
+		case OP_OPEN:
+			status = decode_open(cp, nfserr, &cp->ops[cp->nops].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = decode_open_confirm(cp, nfserr, &cp->ops[cp->nops].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = 0;
+			break;
+		case OP_PUTROOTFH:
+			status = 0;
+			break;
+		case OP_READ:
+			status = decode_read(cp, nfserr, &cp->ops[cp->nops].u.read);
+			break;
+		case OP_READDIR:
+			status = decode_readdir(cp, nfserr, req, &cp->ops[cp->nops].u.readdir);
+			break;
+		case OP_READLINK:
+			status = decode_readlink(cp, nfserr, req, &cp->ops[cp->nops].u.readlink);
+			break;
+		case OP_RESTOREFH:
+			status = 0;
+			break;
+		case OP_REMOVE:
+			status = decode_remove(cp, nfserr, &cp->ops[cp->nops].u.remove);
+			break;
+		case OP_RENAME:
+			status = decode_rename(cp, nfserr, &cp->ops[cp->nops].u.rename);
+			break;
+		case OP_RENEW:
+			status = 0;
+			break;
+		case OP_SAVEFH:
+			status = 0;
+			break;
+		case OP_SETATTR:
+			status = decode_setattr(cp);
+			break;
+		case OP_SETCLIENTID:
+			status = decode_setclientid(cp, nfserr);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = 0;
+			break;
+		case OP_WRITE:
+			status = decode_write(cp, nfserr, &cp->ops[cp->nops].u.write);
+			break;
+		default:
+			BUG();
+			return -EIO;
+		}
+		if (status)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * Decode void reply
+ */
+static int
+nfs4_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	return 0;
+}
+
+/*
+ * Decode COMPOUND response
+ */
+static int
+nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((u8 *) rqstp->rq_rvec->iov_base + rqstp->rq_rvec->iov_len);
+
+	if ((status = decode_compound(cp, rqstp)))
+		goto out;
+	
+	status = 0;
+	if (cp->toplevel_status)
+		status = -nfs_stat_to_errno(cp->toplevel_status);
+
+out:
+	return status;
+}
+
+u32 *
+nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	u32 len;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->prev_cookie = entry->cookie;
+	p = xdr_decode_hyper(p, &entry->cookie);
+	entry->len = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+
+	if (entry->cookie > COOKIE_MAX)
+		entry->cookie = COOKIE_MAX;
+	
+	/*
+	 * In case the server doesn't return an inode number,
+	 * we fake one here.  (We don't use inode number 0,
+	 * since glibc seems to choke on it...)
+	 */
+	entry->ino = 1;
+
+	len = ntohl(*p++);             /* bitmap length */
+	p += len;
+	len = ntohl(*p++);             /* attribute buffer length */
+	if (len)
+		p = xdr_decode_hyper(p, &entry->ino);
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype)				\
+    { "nfs4_" #proc,						\
+      (kxdrproc_t) nfs4_xdr_##argtype,				\
+      (kxdrproc_t) nfs4_xdr_##restype,				\
+      MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+      0							\
+    }
+
+static struct rpc_procinfo	nfs4_procedures[] = {
+  PROC(null,		enc_void,	dec_void),
+  PROC(compound,	enc_compound,	dec_compound)
+};
+
+struct rpc_version		nfs_version4 = {
+	.number			= 4,
+	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.procs			= nfs4_procedures
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 76ab4ecc3ea8..3a23ac81e80f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -472,6 +472,25 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
+#ifdef CONFIG_NFS_V4
+struct nfs4_client {
+        atomic_t                cl_count;       /* refcount */
+        u64                     cl_clientid;    /* constant */
+	 nfs4_verifier           cl_confirm;     
+
+        /*
+         * Starts a list of lockowners, linked through lo_list.
+	 */
+        struct list_head        cl_lockowners;  /* protected by state_spinlock */
+};
+
+/* nfs4proc.c */
+extern int nfs4_proc_renew(struct nfs_server *server);
+
+/* nfs4renewd.c */
+extern int nfs4_init_renewd(struct nfs_server *server);
+#endif /* CONFIG_NFS_V4 */
+
 #ifdef CONFIG_NFS_V4
 
 extern struct nfs4_client *nfs4_get_client(void);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e542fe6982c5..4bb5125056e7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -325,6 +325,219 @@ struct nfs3_readdirres {
 	int			plus;
 };
 
+#ifdef CONFIG_NFS_V4
+
+typedef u64 clientid4;
+
+struct nfs4_change_info {
+	u32				atomic;
+	u64				before;
+	u64				after;
+};
+
+struct nfs4_access {
+	u32				ac_req_access;     /* request */
+	u32 *				ac_resp_supported; /* response */
+	u32 *				ac_resp_access;    /* response */
+};
+
+struct nfs4_close {
+	char *				cl_stateid;        /* request */
+	u32				cl_seqid;          /* request */
+};
+
+struct nfs4_commit {
+	u64				co_start;          /* request */
+	u32				co_len;            /* request */
+	struct nfs_writeverf *		co_verifier;       /* response */
+};
+
+struct nfs4_create {
+	u32				cr_ftype;          /* request */
+	union {                                            /* request */
+		struct {
+			u32		textlen;
+			const char *	text;
+		} symlink;   /* NF4LNK */
+		struct {
+			u32		specdata1;
+			u32		specdata2;
+		} device;    /* NF4BLK, NF4CHR */
+	} u;
+	u32				cr_namelen;        /* request */
+	const char *			cr_name;           /* request */
+	struct iattr *			cr_attrs;          /* request */
+	struct nfs4_change_info	*	cr_cinfo;          /* response */
+};
+#define cr_textlen			u.symlink.textlen
+#define cr_text				u.symlink.text
+#define cr_specdata1			u.device.specdata1
+#define cr_specdata2			u.device.specdata2
+
+struct nfs4_getattr {
+        u32 *				gt_bmval;          /* request */
+        struct nfs_fattr *		gt_attrs;          /* response */
+	struct nfs_fsstat *		gt_fsstat;         /* response */
+	struct nfs_fsinfo *		gt_fsinfo;         /* response */
+	struct nfs_pathconf *		gt_pathconf;       /* response */
+	u32 *				gt_bmres;	   /* response */
+};
+
+struct nfs4_getfh {
+	struct nfs_fh *			gf_fhandle;       /* response */
+};
+
+struct nfs4_link {
+	u32				ln_namelen;       /* request */
+	const char *			ln_name;          /* request */
+	struct nfs4_change_info *	ln_cinfo;         /* response */
+};
+
+struct nfs4_lookup {
+	struct qstr *			lo_name;          /* request */
+};
+
+struct nfs4_open {
+	u32				op_share_access;  /* request */
+	u32				op_opentype;      /* request */
+	u32				op_createmode;    /* request */
+	union {                                           /* request */
+		struct iattr *		attrs;    /* UNCHECKED, GUARDED */
+		nfs4_verifier		verifier; /* EXCLUSIVE */
+	} u;
+	struct qstr *			op_name;          /* request */
+	char *				op_stateid;       /* response */
+	struct nfs4_change_info	*	op_cinfo;         /* response */
+	u32 *				op_rflags;        /* response */
+};
+#define op_attrs     u.attrs
+#define op_verifier  u.verifier
+
+struct nfs4_open_confirm {
+	char *				oc_stateid;       /* request */
+};
+
+struct nfs4_putfh {
+	struct nfs_fh *			pf_fhandle;       /* request */
+};
+
+struct nfs4_read {
+	u64				rd_offset;        /* request */
+	u32				rd_length;        /* request */
+	u32				*rd_eof;          /* response */
+	u32				*rd_bytes_read;   /* response */
+	struct page **			rd_pages;   /* zero-copy data */
+	unsigned int			rd_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_readdir {
+	u64				rd_cookie;        /* request */
+	nfs4_verifier			rd_req_verifier;  /* request */
+	u32				rd_count;         /* request */
+	u32				rd_bmval[2];      /* request */	
+	nfs4_verifier			rd_resp_verifier; /* response */
+	struct page **			rd_pages;   /* zero-copy data */
+	unsigned int			rd_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_readlink {
+	u32				rl_count;   /* zero-copy data */
+	struct page **			rl_pages;   /* zero-copy data */
+};
+
+struct nfs4_remove {
+	u32				rm_namelen;       /* request */
+	const char *			rm_name;          /* request */
+	struct nfs4_change_info *	rm_cinfo;         /* response */
+};
+
+struct nfs4_rename {
+	u32				rn_oldnamelen;    /* request */
+	const char *			rn_oldname;       /* request */
+	u32				rn_newnamelen;    /* request */
+	const char *			rn_newname;       /* request */
+	struct nfs4_change_info	*	rn_src_cinfo;     /* response */
+	struct nfs4_change_info *	rn_dst_cinfo;     /* response */
+};
+
+struct nfs4_setattr {
+	char *				st_stateid;       /* request */
+	struct iattr *			st_iap;           /* request */
+};
+
+struct nfs4_setclientid {
+	nfs4_verifier			sc_verifier;      /* request */
+	char *				sc_name;	  /* request */
+	u32				sc_prog;          /* request */
+	char				sc_netid[4];	  /* request */
+	char				sc_uaddr[24];     /* request */
+	u32				sc_cb_ident;      /* request */
+};
+
+struct nfs4_write {
+	u64				wr_offset;        /* request */
+	u32				wr_stable_how;    /* request */
+	u32				wr_len;           /* request */
+	u32 *				wr_bytes_written; /* response */
+	struct nfs_writeverf *		wr_verf;          /* response */
+	struct page **			wr_pages;   /* zero-copy data */
+	unsigned int			wr_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_op {
+	u32				opnum;
+	u32				nfserr;
+	union {
+		struct nfs4_access	access;
+		struct nfs4_close	close;
+		struct nfs4_commit	commit;
+		struct nfs4_create	create;
+		struct nfs4_getattr	getattr;
+		struct nfs4_getfh	getfh;
+		struct nfs4_link	link;
+		struct nfs4_lookup	lookup;
+		struct nfs4_open	open;
+		struct nfs4_open_confirm open_confirm;
+		struct nfs4_putfh	putfh;
+		struct nfs4_read	read;
+		struct nfs4_readdir	readdir;
+		struct nfs4_readlink	readlink;
+		struct nfs4_remove	remove;
+		struct nfs4_rename	rename;
+		struct nfs4_setattr	setattr;
+		struct nfs4_setclientid	setclientid;
+		struct nfs4_write	write;
+	} u;
+};
+
+struct nfs4_compound {
+	unsigned int		flags;   /* defined below */
+	struct nfs_server *	server;
+
+	/* RENEW information */
+	int			renew_index;
+	unsigned long		timestamp;
+
+	/* scratch variables for XDR encode/decode */
+	int			nops;
+	u32 *			p;
+	u32 *			end;
+
+	/* the individual COMPOUND operations */
+	struct nfs4_op		*ops;
+
+	/* request */
+	int			req_nops;
+	u32			taglen;
+	char *			tag;
+	
+	/* response */
+	int			resp_nops;
+	int			toplevel_status;
+};
+
+#endif /* CONFIG_NFS_V4 */
+
 struct nfs_read_data {
 	struct rpc_task		task;
 	struct inode		*inode;
@@ -338,7 +551,12 @@ struct nfs_read_data {
 			struct nfs_readres  res;
 		} v3;   /* also v2 */
 #ifdef CONFIG_NFS_V4
-		/* NFSv4 data will come here... */
+		struct {
+			struct nfs4_compound  compound;
+			struct nfs4_op        ops[3];
+			u32                   res_count;
+			u32                   res_eof;
+		} v4;
 #endif
 	} u;
 };
@@ -353,11 +571,17 @@ struct nfs_write_data {
 	struct page		*pagevec[NFS_WRITE_MAXIOV];
 	union {
 		struct {
-			struct nfs_writeargs args;
-			struct nfs_writeres  res;
+			struct nfs_writeargs	args;		/* argument struct */
+			struct nfs_writeres	res;		/* result struct */
 		} v3;
 #ifdef CONFIG_NFS_V4
-		/* NFSv4 data to come here... */
+		struct {
+			struct nfs4_compound  compound;
+			struct nfs4_op        ops[3];
+			u32                   arg_count;
+			u32                   arg_stable;
+			u32                   res_count;
+		} v4;
 #endif
 	} u;
 };
@@ -430,8 +654,10 @@ struct nfs_rpc_ops {
  */
 extern struct nfs_rpc_ops	nfs_v2_clientops;
 extern struct nfs_rpc_ops	nfs_v3_clientops;
+extern struct nfs_rpc_ops	nfs_v4_clientops;
 extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
+extern struct rpc_version	nfs_version4;
 extern struct rpc_program	nfs_program;
 extern struct rpc_stat		nfs_rpcstat;
 
-- 
cgit v1.2.3


From caa71871e47696c549c5a6e33666b956ca4f7751 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:58 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

This patch defines a new switch in fs/Config.in -
  CONFIG_NFS_V4:  enables nfsv4 client
---
 fs/Config.help | 7 +++++++
 fs/Config.in   | 1 +
 2 files changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/Config.help b/fs/Config.help
index 44622847c4df..d8492860bf9c 100644
--- a/fs/Config.help
+++ b/fs/Config.help
@@ -535,6 +535,13 @@ CONFIG_NFS_DIRECTIO
   causes open() to return EINVAL if a file residing in NFS is
   opened with the O_DIRECT flag.
 
+CONFIG_NFS_V4
+  Say Y here if you want your NFS client to be able to speak the newer
+  version 4 of the NFS protocol.  This feature is experimental, and
+  should only be used if you are interested in helping to test NFSv4.
+
+  If unsure, say N.
+
 CONFIG_ROOT_NFS
   If you want your Linux box to mount its whole root file system (the
   one containing the directory /) from some other computer over the
diff --git a/fs/Config.in b/fs/Config.in
index e6eb844338ec..b2c37c7419b9 100644
--- a/fs/Config.in
+++ b/fs/Config.in
@@ -117,6 +117,7 @@ if [ "$CONFIG_NET" = "y" ]; then
    dep_tristate 'InterMezzo file system support (replicating fs) (EXPERIMENTAL)' CONFIG_INTERMEZZO_FS $CONFIG_INET $CONFIG_EXPERIMENTAL
    dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
    dep_mbool '  Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
+   dep_mbool '  Provide NFSv4 client support (EXPERIMENTAL)' CONFIG_NFS_V4 $CONFIG_NFS_FS $CONFIG_EXPERIMENTAL
    dep_bool '  Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
 
    dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
-- 
cgit v1.2.3


From 67bb51b9c7ccf1dbda676d3fb60380d5ffea2dec Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 15 Oct 2002 05:47:34 -0700
Subject: [PATCH] AFS filesystem 2/2

Here's a patch to add an Andrew File System (AFS) driver to the kernel.
Currently it only provides read-only, uncached, non-automounted and
unsecured support.
---
 Documentation/filesystems/afs.txt | 155 +++++++
 fs/Config.help                    |   8 +
 fs/Config.in                      |  10 +
 fs/Makefile                       |   1 +
 fs/afs/Makefile                   |  36 ++
 fs/afs/cache-layout.h             | 224 +++++++++++
 fs/afs/callback.c                 | 168 ++++++++
 fs/afs/cell.c                     | 452 +++++++++++++++++++++
 fs/afs/cell.h                     |  63 +++
 fs/afs/cmservice.c                | 639 +++++++++++++++++++++++++++++
 fs/afs/cmservice.h                |  27 ++
 fs/afs/dir.c                      | 642 +++++++++++++++++++++++++++++
 fs/afs/errors.h                   |  34 ++
 fs/afs/file.c                     | 143 +++++++
 fs/afs/fsclient.c                 | 816 +++++++++++++++++++++++++++++++++++++
 fs/afs/fsclient.h                 |  53 +++
 fs/afs/inode.c                    | 418 +++++++++++++++++++
 fs/afs/internal.h                 | 127 ++++++
 fs/afs/kafsasyncd.c               | 260 ++++++++++++
 fs/afs/kafsasyncd.h               |  49 +++
 fs/afs/kafstimod.c                | 211 ++++++++++
 fs/afs/kafstimod.h                |  45 +++
 fs/afs/main.c                     | 193 +++++++++
 fs/afs/misc.c                     |  39 ++
 fs/afs/mntpt.c                    | 112 ++++++
 fs/afs/mount.h                    |  23 ++
 fs/afs/proc.c                     | 739 ++++++++++++++++++++++++++++++++++
 fs/afs/server.c                   | 489 ++++++++++++++++++++++
 fs/afs/server.h                   |  97 +++++
 fs/afs/super.c                    | 595 +++++++++++++++++++++++++++
 fs/afs/super.h                    |  43 ++
 fs/afs/transport.h                |  21 +
 fs/afs/types.h                    | 152 +++++++
 fs/afs/vlclient.c                 | 662 ++++++++++++++++++++++++++++++
 fs/afs/vlclient.h                 |  95 +++++
 fs/afs/vlocation.c                | 824 ++++++++++++++++++++++++++++++++++++++
 fs/afs/vnode.c                    | 316 +++++++++++++++
 fs/afs/vnode.h                    |  88 ++++
 fs/afs/volume.c                   | 430 ++++++++++++++++++++
 fs/afs/volume.h                   |  92 +++++
 40 files changed, 9591 insertions(+)
 create mode 100644 Documentation/filesystems/afs.txt
 create mode 100644 fs/afs/Makefile
 create mode 100644 fs/afs/cache-layout.h
 create mode 100644 fs/afs/callback.c
 create mode 100644 fs/afs/cell.c
 create mode 100644 fs/afs/cell.h
 create mode 100644 fs/afs/cmservice.c
 create mode 100644 fs/afs/cmservice.h
 create mode 100644 fs/afs/dir.c
 create mode 100644 fs/afs/errors.h
 create mode 100644 fs/afs/file.c
 create mode 100644 fs/afs/fsclient.c
 create mode 100644 fs/afs/fsclient.h
 create mode 100644 fs/afs/inode.c
 create mode 100644 fs/afs/internal.h
 create mode 100644 fs/afs/kafsasyncd.c
 create mode 100644 fs/afs/kafsasyncd.h
 create mode 100644 fs/afs/kafstimod.c
 create mode 100644 fs/afs/kafstimod.h
 create mode 100644 fs/afs/main.c
 create mode 100644 fs/afs/misc.c
 create mode 100644 fs/afs/mntpt.c
 create mode 100644 fs/afs/mount.h
 create mode 100644 fs/afs/proc.c
 create mode 100644 fs/afs/server.c
 create mode 100644 fs/afs/server.h
 create mode 100644 fs/afs/super.c
 create mode 100644 fs/afs/super.h
 create mode 100644 fs/afs/transport.h
 create mode 100644 fs/afs/types.h
 create mode 100644 fs/afs/vlclient.c
 create mode 100644 fs/afs/vlclient.h
 create mode 100644 fs/afs/vlocation.c
 create mode 100644 fs/afs/vnode.c
 create mode 100644 fs/afs/vnode.h
 create mode 100644 fs/afs/volume.c
 create mode 100644 fs/afs/volume.h

(limited to 'fs')

diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
new file mode 100644
index 000000000000..2f4237dfb8c7
--- /dev/null
+++ b/Documentation/filesystems/afs.txt
@@ -0,0 +1,155 @@
+			     kAFS: AFS FILESYSTEM
+			     ====================
+
+ABOUT
+=====
+
+This filesystem provides a fairly simple AFS filesystem driver. It is under
+development and only provides very basic facilities. It does not yet support
+the following AFS features:
+
+	(*) Write support.
+	(*) Communications security.
+	(*) Local caching.
+	(*) pioctl() system call.
+	(*) Automatic mounting of embedded mountpoints.
+
+
+USAGE
+=====
+
+When inserting the driver modules the root cell must be specified along with a
+list of volume location server IP addresses:
+
+	insmod rxrpc.o
+	insmod kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
+
+The first module is a driver for the RxRPC remote operation protocol, and the
+second is the actual filesystem driver for the AFS filesystem.
+
+Once the module has been loaded, more modules can be added by the following
+procedure:
+
+	echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
+
+Where the parameters to the "add" command are the name of a cell and a list of
+volume location servers within that cell.
+
+Filesystems can be mounted anywhere by commands similar to the following:
+
+	mount -t afs "%cambridge.redhat.com:root.afs." /afs
+	mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge
+	mount -t afs "#root.afs." /afs
+	mount -t afs "#root.cell." /afs/cambridge
+
+  NB: When using this on Linux 2.4, the mount command has to be different,
+      since the filesystem doesn't have access to the device name argument:
+
+	mount -t afs none /afs -ovol="#root.afs."
+
+Where the initial character is either a hash or a percent symbol depending on
+whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O
+volume, but are willing to use a R/W volume instead (percent).
+
+The name of the volume can be suffixes with ".backup" or ".readonly" to
+specify connection to only volumes of those types.
+
+The name of the cell is optional, and if not given during a mount, then the
+named volume will be looked up in the cell specified during insmod.
+
+Additional cells can be added through /proc (see later section).
+
+
+MOUNTPOINTS
+===========
+
+AFS has a concept of mountpoints. These are specially formatted symbolic links
+(of the same form as the "device name" passed to mount). kAFS presents these
+to the user as directories that have special properties:
+
+  (*) They cannot be listed. Running a program like "ls" on them will incur an
+      EREMOTE error (Object is remote).
+
+  (*) Other objects can't be looked up inside of them. This also incurs an
+      EREMOTE error.
+
+  (*) They can be queried with the readlink() system call, which will return
+      the name of the mountpoint to which they point. The "readlink" program
+      will also work.
+
+  (*) They can be mounted on (which symbolic links can't).
+
+
+PROC FILESYSTEM
+===============
+
+The rxrpc module creates a number of files in various places in the /proc
+filesystem:
+
+  (*) Firstly, some information files are made available in a directory called
+      "/proc/net/rxrpc/". These list the extant transport endpoint, peer,
+      connection and call records.
+
+  (*) Secondly, some control files are made available in a directory called
+      "/proc/sys/rxrpc/". Currently, all these files can be used for is to
+      turn on various levels of tracing.
+
+The AFS modules creates a "/proc/fs/afs/" directory and populates it:
+
+  (*) A "cells" file that lists cells currently known to the afs module.
+
+  (*) A directory per cell that contains files that list volume location
+      servers, volumes, and active servers known within that cell.
+
+
+THE CELL DATABASE
+=================
+
+The filesystem maintains an internal database of all the cells it knows and
+the IP addresses of the volume location servers for those cells. The cell to
+which the computer belongs is added to the database when insmod is performed
+by the "rootcell=" argument.
+
+Further cells can be added by commands similar to the following:
+
+	echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells
+	echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
+
+No other cell database operations are available at this time.
+
+
+EXAMPLES
+========
+
+Here's what I use to test this. Some of the names and IP addresses are local
+to my internal DNS. My "root.afs" partition has a mount point within it for
+some public volumes volumes.
+
+insmod -S /tmp/rxrpc.o 
+insmod -S /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
+
+mount -t afs \%root.afs. /afs
+mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/
+
+echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells 
+mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/
+mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive
+mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib
+mount -t afs "#grand.central.org:root.doc." /afs/grand.central.org/doc
+mount -t afs "#grand.central.org:root.project." /afs/grand.central.org/project
+mount -t afs "#grand.central.org:root.service." /afs/grand.central.org/service
+mount -t afs "#grand.central.org:root.software." /afs/grand.central.org/software
+mount -t afs "#grand.central.org:root.user." /afs/grand.central.org/user
+
+umount /afs/grand.central.org/user
+umount /afs/grand.central.org/software
+umount /afs/grand.central.org/service
+umount /afs/grand.central.org/project
+umount /afs/grand.central.org/doc
+umount /afs/grand.central.org/contrib
+umount /afs/grand.central.org/archive
+umount /afs/grand.central.org
+umount /afs/cambridge.redhat.com
+umount /afs
+rmmod kafs
+rmmod rxrpc
diff --git a/fs/Config.help b/fs/Config.help
index d8492860bf9c..76ffd7584add 100644
--- a/fs/Config.help
+++ b/fs/Config.help
@@ -1144,3 +1144,11 @@ CONFIG_XFS_RT
 
   If unsure, say N.
 
+CONFIG_AFS_FS
+  If you say Y here, you will get an experimental Andrew File System
+  driver. It currently only supports unsecured read-only AFS access.
+
+  See Documentation/filesystems/afs.txt for more intormation.
+
+  If unsure, say N.
+
diff --git a/fs/Config.in b/fs/Config.in
index b2c37c7419b9..0464a17a8dbd 100644
--- a/fs/Config.in
+++ b/fs/Config.in
@@ -158,6 +158,16 @@ if [ "$CONFIG_NET" = "y" ]; then
       # for fs/nls/Config.in
       define_bool CONFIG_NCPFS_NLS n
    fi
+
+   dep_tristate 'Andrew File System support (AFS) (Experimental)' CONFIG_AFS_FS $CONFIG_INET $CONFIG_EXPERIMENTAL
+   if [ "$CONFIG_AFS_FS" = "y" ]; then
+      define_tristate CONFIG_RXRPC y
+   else
+      if [ "$CONFIG_AFS_FS" = "m" ]; then
+	 define_tristate CONFIG_RXRPC m
+      fi
+   fi
+
    endmenu
 
 else
diff --git a/fs/Makefile b/fs/Makefile
index a4320cf860ac..c28d57ab55a9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -86,5 +86,6 @@ obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
+obj-$(CONFIG_AFS_FS)		+= afs/
 
 include $(TOPDIR)/Rules.make
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
new file mode 100644
index 000000000000..753cf8c5b4eb
--- /dev/null
+++ b/fs/afs/Makefile
@@ -0,0 +1,36 @@
+#
+# Makefile for Red Hat Linux AFS client.
+#
+
+kafs-objs := \
+	callback.o \
+	cell.o \
+	cmservice.o \
+	dir.o \
+	file.o \
+	fsclient.o \
+	inode.o \
+	kafsasyncd.o \
+	kafstimod.o \
+	main.o \
+	misc.o \
+	mntpt.o \
+	proc.o \
+	server.o \
+	super.o \
+	vlclient.o \
+	vlocation.o \
+	vnode.o \
+	volume.o
+
+#	cache.o
+
+obj-m  := kafs.o
+
+# superfluous for 2.5, but needed for 2.4..
+ifeq "$(VERSION).$(PATCHLEVEL)" "2.4"
+kafs.o: $(kafs-objs)
+	$(LD) -r -o kafs.o $(kafs-objs)
+endif
+
+include $(TOPDIR)/Rules.make
diff --git a/fs/afs/cache-layout.h b/fs/afs/cache-layout.h
new file mode 100644
index 000000000000..e71afd719a3f
--- /dev/null
+++ b/fs/afs/cache-layout.h
@@ -0,0 +1,224 @@
+/* cache-layout.h: AFS cache layout
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * The cache is stored on a block device and is laid out as:
+ *
+ *  0	+------------------------------------------------
+ *	|
+ *	|  SuperBlock
+ *	|
+ *  1	+------------------------------------------------
+ *	|
+ *	|  file-meta-data File: Data block #0
+ *	|  - file-meta-data file (volix #0 file #0) : Meta-data block
+ *	|    - contains direct pointers to first 64 file data blocks
+ *	|  - Cached cell catalogue file (volix #0 file #1) file: Meta-data block
+ *	|  - Cached volume location catalogue file (volix #0 file #2): Meta-data block
+ *	|  - Vnode catalogue hash bucket #n file: Meta-data block
+ *	|
+ *  2	+------------------------------------------------
+ *	|
+ *	|  Bitmap Block Allocation Bitmap
+ *	|  - 1 bit per block in the bitmap block
+ *      |  - bit 0 of dword 0 refers to the bitmap block 0
+ *	|    - set if the bitmap block is full
+ *      |  - 32768 bits per block, requiring 4 blocks for a 16Tb cache
+ *	|  - bitmap bitmap blocks are cleared initially
+ *	|  - not present if <4 bitmap blocks
+ *	|
+ *	+------------------------------------------------
+ *	|
+ *	|  File Block Allocation Bitmap
+ *	|  - 1 bit per block in the cache
+ *      |  - bit 0 of dword 0 refers to the first block of the data cache
+ *	|    - set if block is allocated
+ *      |  - 32768 bits per block, requiring 131072 blocks for a 16Tb cache
+ *	|  - bitmap blocks are cleared lazily (sb->bix_bitmap_unready)
+ *	|
+ *	+------------------------------------------------
+ *	|
+ *	|  Data Cache
+ *	|
+ *  End	+------------------------------------------------
+ *
+ * Blocks are indexed by an unsigned 32-bit word, meaning that the cache can hold up to 2^32 pages,
+ * or 16Tb in total.
+ *
+ * Credentials will be cached in memory, since they are subject to change without notice, and are
+ * difficult to derive manually, being constructed from the following information:
+ * - per vnode user ID and mode mask
+ * - parent directory ACL
+ * - directory ACL (dirs only)
+ * - group lists from ptserver
+ */
+
+#ifndef _LINUX_AFS_CACHE_LAYOUT_H
+#define _LINUX_AFS_CACHE_LAYOUT_H
+
+#include "types.h"
+
+typedef u32 afsc_blockix_t;
+typedef u32 afsc_cellix_t;
+
+/* Cached volume index
+ * - afsc_volix_t/4 is the index into the volume cache
+ * - afsc_volix_t%4 is 0 for R/W, 1 for R/O and 2 for Bak (3 is not used)
+ * - afsc_volix_t==0-3 refers to a "virtual" volume that stores meta-data about the cache
+ */
+typedef struct {
+	u32 index;
+} afsc_volix_t;
+
+#define AFSC_VNCAT_HASH_NBUCKETS	128
+
+/* special meta file IDs (all cell 0 vol 0) */
+enum afsc_meta_fids {
+	AFSC_META_FID_METADATA		= 0,
+	AFSC_META_FID_CELL_CATALOGUE	= 1,
+	AFSC_META_FID_VLDB_CATALOGUE	= 2,
+	AFSC_META_FID_VNODE_CATALOGUE0	= 3,
+	AFSC_META_FID__COUNT		= AFSC_VNCAT_HASH_NBUCKETS + 3
+};
+
+/*****************************************************************************/
+/*
+ * cache superblock block layout
+ * - the blockdev is prepared for initialisation by 'echo "kafsuninit" >/dev/hdaXX' before mounting
+ * - when initialised, the magic number is changed to "kafs-cache"
+ */
+struct afsc_super_block
+{
+	char			magic[10];	/* magic number */
+#define AFSC_SUPER_MAGIC "kafs-cache"
+#define AFSC_SUPER_MAGIC_NEEDS_INIT "kafsuninit"
+#define AFSC_SUPER_MAGIC_SIZE 10
+
+	unsigned short		endian;		/* 0x1234 stored CPU-normal order */
+#define AFSC_SUPER_ENDIAN 0x1234
+
+	unsigned		version;	/* format version */
+#define AFSC_SUPER_VERSION 1
+
+	/* layout */
+	unsigned		bsize;			/* cache block size */
+	afsc_blockix_t		bix_bitmap_fullmap;	/* block ix of bitmap full bitmap */
+	afsc_blockix_t		bix_bitmap;		/* block ix of alloc bitmap */
+	afsc_blockix_t		bix_bitmap_unready;	/* block ix of unready area of bitmap */
+	afsc_blockix_t		bix_cache;		/* block ix of data cache */
+	afsc_blockix_t		bix_end;		/* block ix of end of cache */
+};
+
+/*****************************************************************************/
+/*
+ * vnode (inode) metadata cache record
+ * - padded out to 512 bytes and stored eight to a page
+ * - only the data version is necessary
+ *   - disconnected operation is not supported
+ *   - afs_iget() contacts the server to get the meta-data _anyway_ when an inode is first brought
+ *     into memory
+ * - at least 64 direct block pointers will be available (a directory is max 256Kb)
+ * - any block pointer which is 0 indicates an uncached page
+ */
+struct afsc_vnode_meta
+{
+	/* file ID */
+	afsc_volix_t		volume_ix;	/* volume catalogue index */
+	unsigned		vnode;		/* vnode number */
+	unsigned		unique;		/* FID unique */
+	unsigned		size;		/* size of file */
+	time_t			mtime;		/* last modification time */
+
+	/* file status */
+	afs_dataversion_t	version;	/* current data version */
+
+	/* file contents */
+	afsc_blockix_t		dbl_indirect;	/* double indirect block index */
+	afsc_blockix_t		indirect;	/* single indirect block 0 index */
+	afsc_blockix_t		direct[0];	/* direct block index (#AFSC_VNODE_META_DIRECT) */
+};
+
+#define AFSC_VNODE_META_RECSIZE	512	/* record size */
+
+#define AFSC_VNODE_META_DIRECT	\
+	((AFSC_VNODE_META_RECSIZE-sizeof(struct afsc_vnode_meta))/sizeof(afsc_blockix_t))
+
+#define AFSC_VNODE_META_PER_PAGE	(PAGE_SIZE / AFSC_VNODE_META_RECSIZE)
+
+/*****************************************************************************/
+/*
+ * entry in the cached cell catalogue
+ */
+struct afsc_cell_record
+{
+	char			name[64];	/* cell name (padded with NULs) */
+	struct in_addr		servers[16];	/* cached cell servers */
+};
+
+/*****************************************************************************/
+/*
+ * entry in the cached volume location catalogue
+ * - indexed by afsc_volix_t/4
+ */
+struct afsc_vldb_record
+{
+	char			name[64];	/* volume name (padded with NULs) */
+	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
+	unsigned char		vidmask;	/* voltype mask for vid[] */
+	unsigned char		_pad[1];
+	unsigned short		nservers;	/* number of entries used in servers[] */
+	struct in_addr		servers[8];	/* fileserver addresses */
+	unsigned char		srvtmask[8];	/* voltype masks for servers[] */
+#define AFSC_VOL_STM_RW	0x01 /* server holds a R/W version of the volume */
+#define AFSC_VOL_STM_RO	0x02 /* server holds a R/O version of the volume */
+#define AFSC_VOL_STM_BAK	0x04 /* server holds a backup version of the volume */
+
+	afsc_cellix_t		cell_ix;	/* cell catalogue index (MAX_UINT if unused) */
+	time_t			ctime;		/* time at which cached */
+};
+
+/*****************************************************************************/
+/*
+ * vnode catalogue entry
+ * - must be 2^x size so that do_generic_file_read doesn't present them split across pages
+ */
+struct afsc_vnode_catalogue
+{
+	afsc_volix_t		volume_ix;	/* volume catalogue index */
+	afs_vnodeid_t		vnode;		/* vnode ID */
+	u32			meta_ix;	/* metadata file index */
+	u32			atime;		/* last time entry accessed */
+} __attribute__((packed));
+
+#define AFSC_VNODE_CATALOGUE_PER_BLOCK ((size_t)(PAGE_SIZE/sizeof(struct afsc_vnode_catalogue)))
+
+/*****************************************************************************/
+/*
+ * vnode data "page directory" block
+ * - first 1024 pages don't map through here
+ * - PAGE_SIZE in size
+ */
+struct afsc_indirect_block
+{
+	afsc_blockix_t		pt_bix[1024];	/* "page table" block indices */
+};
+
+/*****************************************************************************/
+/*
+ * vnode data "page table" block
+ * - PAGE_SIZE in size
+ */
+struct afsc_dbl_indirect_block
+{
+	afsc_blockix_t		page_bix[1024];	/* "page" block indices */
+};
+
+
+#endif /* _LINUX_AFS_CACHE_LAYOUT_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
new file mode 100644
index 000000000000..8d030bd67aa0
--- /dev/null
+++ b/fs/afs/callback.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include "server.h"
+#include "vnode.h"
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to request callback state (re-)initialisation
+ */
+int SRXAFSCM_InitCallBackState(afs_server_t *server)
+{
+	struct list_head callbacks;
+
+	_enter("%p",server);
+
+	INIT_LIST_HEAD(&callbacks);
+
+	/* transfer the callback list from the server to a temp holding area */
+	spin_lock(&server->cb_lock);
+
+	list_add(&callbacks,&server->cb_promises);
+	list_del_init(&server->cb_promises);
+
+	/* munch our way through the list, grabbing the inode, dropping all the locks and regetting
+	 * them in the right order
+	 */
+	while (!list_empty(&callbacks)) {
+		struct inode *inode;
+		afs_vnode_t *vnode;
+
+		vnode = list_entry(callbacks.next,afs_vnode_t,cb_link);
+		list_del_init(&vnode->cb_link);
+
+		/* try and grab the inode - may fail */
+		inode = igrab(AFS_VNODE_TO_I(vnode));
+		if (inode) {
+			int release = 0;
+
+			spin_unlock(&server->cb_lock);
+			spin_lock(&vnode->lock);
+
+			if (vnode->cb_server==server) {
+				vnode->cb_server = NULL;
+				afs_kafstimod_del_timer(&vnode->cb_timeout);
+				spin_lock(&afs_cb_hash_lock);
+				list_del_init(&vnode->cb_hash_link);
+				spin_unlock(&afs_cb_hash_lock);
+				release = 1;
+			}
+
+			spin_unlock(&vnode->lock);
+
+			iput(inode);
+			if (release) afs_put_server(server);
+
+			spin_lock(&server->cb_lock);
+		}
+	}
+
+	spin_unlock(&server->cb_lock);
+
+	_leave(" = 0");
+	return 0;
+} /* end SRXAFSCM_InitCallBackState() */
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to break callback promises
+ */
+int SRXAFSCM_CallBack(afs_server_t *server, size_t count, afs_callback_t callbacks[])
+{
+	struct list_head *_p;
+
+	_enter("%p,%u,",server,count);
+
+	for (; count>0; callbacks++, count--) {
+		struct inode *inode = NULL;
+		afs_vnode_t *vnode = NULL;
+		int valid = 0;
+
+		_debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
+		       callbacks->fid.vid,
+		       callbacks->fid.vnode,
+		       callbacks->fid.unique,
+		       callbacks->version,
+		       callbacks->expiry,
+		       callbacks->type
+		       );
+
+		/* find the inode for this fid */
+		spin_lock(&afs_cb_hash_lock);
+
+		list_for_each(_p,&afs_cb_hash(server,&callbacks->fid)) {
+			vnode = list_entry(_p,afs_vnode_t,cb_hash_link);
+
+			if (memcmp(&vnode->fid,&callbacks->fid,sizeof(afs_fid_t))!=0)
+				continue;
+
+			/* right vnode, but is it same server? */
+			if (vnode->cb_server!=server)
+				break; /* no */
+
+			/* try and nail the inode down */
+			inode = igrab(AFS_VNODE_TO_I(vnode));
+			break;
+		}
+
+		spin_unlock(&afs_cb_hash_lock);
+
+		if (inode) {
+			/* we've found the record for this vnode */
+			spin_lock(&vnode->lock);
+			if (vnode->cb_server==server) {
+				/* the callback _is_ on the calling server */
+				vnode->cb_server = NULL;
+				valid = 1;
+
+				afs_kafstimod_del_timer(&vnode->cb_timeout);
+				vnode->flags |= AFS_VNODE_CHANGED;
+
+				spin_lock(&server->cb_lock);
+				list_del_init(&vnode->cb_link);
+				spin_unlock(&server->cb_lock);
+
+				spin_lock(&afs_cb_hash_lock);
+				list_del_init(&vnode->cb_hash_link);
+				spin_unlock(&afs_cb_hash_lock);
+			}
+			spin_unlock(&vnode->lock);
+
+			if (valid) {
+				invalidate_inode_pages(inode->i_mapping);
+				afs_put_server(server);
+			}
+			iput(inode);
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+} /* end SRXAFSCM_CallBack() */
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+int SRXAFSCM_Probe(afs_server_t *server)
+{
+	_debug("SRXAFSCM_Probe(%p)\n",server);
+	return 0;
+} /* end SRXAFSCM_Probe() */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
new file mode 100644
index 000000000000..f7f00a2bec9e
--- /dev/null
+++ b/fs/afs/cell.c
@@ -0,0 +1,452 @@
+/* cell.c: AFS cell and server record management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include "volume.h"
+#include "cell.h"
+#include "server.h"
+#include "transport.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include "super.h"
+#include "internal.h"
+
+DECLARE_RWSEM(afs_proc_cells_sem);
+LIST_HEAD(afs_proc_cells);
+
+static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
+static rwlock_t afs_cells_lock = RW_LOCK_UNLOCKED;
+static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
+static afs_cell_t *afs_cell_root;
+
+static char *rootcell;
+
+MODULE_PARM(rootcell,"s");
+MODULE_PARM_DESC(rootcell,"root AFS cell name and VL server IP addr list");
+
+/*****************************************************************************/
+/*
+ * create a cell record
+ * - "name" is the name of the cell
+ * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ */
+int afs_cell_create(const char *name, char *vllist, afs_cell_t **_cell)
+{
+	afs_cell_t *cell;
+	char *next;
+	int ret;
+
+	_enter("%s",name);
+
+	if (!name) BUG(); /* TODO: want to look up "this cell" in the cache */
+
+	down_write(&afs_cells_sem);
+
+	/* allocate and initialise a cell record */
+	cell = kmalloc(sizeof(afs_cell_t) + strlen(name) + 1,GFP_KERNEL);
+	if (!cell) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(cell,0,sizeof(afs_cell_t));
+	atomic_set(&cell->usage,0);
+
+	INIT_LIST_HEAD(&cell->link);
+	INIT_LIST_HEAD(&cell->caches);
+
+	rwlock_init(&cell->sv_lock);
+	INIT_LIST_HEAD(&cell->sv_list);
+	INIT_LIST_HEAD(&cell->sv_graveyard);
+	spin_lock_init(&cell->sv_gylock);
+
+	init_rwsem(&cell->vl_sem);
+	INIT_LIST_HEAD(&cell->vl_list);
+	INIT_LIST_HEAD(&cell->vl_graveyard);
+	spin_lock_init(&cell->vl_gylock);
+
+	strcpy(cell->name,name);
+
+	/* fill in the VL server list from the rest of the string */
+	ret = -EINVAL;
+	do {
+		unsigned a, b, c, d;
+
+		next = strchr(vllist,':');
+		if (next) *next++ = 0;
+
+		if (sscanf(vllist,"%u.%u.%u.%u",&a,&b,&c,&d)!=4)
+			goto badaddr;
+
+		if (a>255 || b>255 || c>255 || d>255)
+			goto badaddr;
+
+		cell->vl_addrs[cell->vl_naddrs++].s_addr =
+			htonl((a<<24)|(b<<16)|(c<<8)|d);
+
+		if (cell->vl_naddrs>=16)
+			break;
+
+	} while(vllist=next, vllist);
+
+	/* add a proc dir for this cell */
+	ret = afs_proc_cell_setup(cell);
+	if (ret<0)
+		goto error;
+
+	/* add to the cell lists */
+	write_lock(&afs_cells_lock);
+	list_add_tail(&cell->link,&afs_cells);
+	write_unlock(&afs_cells_lock);
+
+	down_write(&afs_proc_cells_sem);
+	list_add_tail(&cell->proc_link,&afs_proc_cells);
+	up_write(&afs_proc_cells_sem);
+
+	*_cell = cell;
+	up_write(&afs_cells_sem);
+
+	_leave(" = 0 (%p)",cell);
+	return 0;
+
+ badaddr:
+	printk("kAFS: bad VL server IP address: '%s'\n",vllist);
+ error:
+	up_write(&afs_cells_sem);
+	kfree(afs_cell_root);
+	return ret;
+} /* end afs_cell_create() */
+
+/*****************************************************************************/
+/*
+ * initialise the cell database from module parameters
+ */
+int afs_cell_init(void)
+{
+	char *cp;
+	int ret;
+
+	_enter("");
+
+	if (!rootcell) {
+		printk("kAFS: no root cell specified\n");
+		return -EINVAL;
+	}
+
+	cp = strchr(rootcell,':');
+	if (!cp) {
+		printk("kAFS: no VL server IP addresses specified\n");
+		return -EINVAL;
+	}
+
+	/* allocate a cell record for the root cell */
+	*cp++ = 0;
+	ret = afs_cell_create(rootcell,cp,&afs_cell_root);
+	if (ret==0)
+		afs_get_cell(afs_cell_root);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_cell_init() */
+
+/*****************************************************************************/
+/*
+ * lookup a cell record
+ */
+int afs_cell_lookup(const char *name, afs_cell_t **_cell)
+{
+	struct list_head *_p;
+	afs_cell_t *cell;
+
+	_enter("\"%s\",",name?name:"*thiscell*");
+
+	cell = afs_cell_root;
+
+	if (name) {
+		/* if the cell was named, look for it in the cell record list */
+		cell = NULL;
+		read_lock(&afs_cells_lock);
+
+		list_for_each(_p,&afs_cells) {
+			cell = list_entry(_p,afs_cell_t,link);
+			if (strcmp(cell->name,name)==0)
+				break;
+			cell = NULL;
+		}
+
+		read_unlock(&afs_cells_lock);
+	}
+
+	if (cell)
+		afs_get_cell(cell);
+
+	*_cell = cell;
+	_leave(" = %d (%p)",cell?0:-ENOENT,cell);
+	return cell ? 0 : -ENOENT;
+
+} /* end afs_cell_lookup() */
+
+/*****************************************************************************/
+/*
+ * try and get a cell record
+ */
+afs_cell_t *afs_get_cell_maybe(afs_cell_t **_cell)
+{
+	afs_cell_t *cell;
+
+	write_lock(&afs_cells_lock);
+
+	cell = *_cell;
+	if (cell && !list_empty(&cell->link))
+		atomic_inc(&cell->usage);
+	else 
+		cell = NULL;
+
+	write_unlock(&afs_cells_lock);
+
+	return cell;
+} /* end afs_get_cell_maybe() */
+
+/*****************************************************************************/
+/*
+ * destroy a cell record
+ */
+void afs_put_cell(afs_cell_t *cell)
+{
+	_enter("%p{%d,%s}",cell,atomic_read(&cell->usage),cell->name);
+
+	/* sanity check */
+	if (atomic_read(&cell->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&afs_cells_lock);
+
+	if (likely(!atomic_dec_and_test(&cell->usage))) {
+		write_unlock(&afs_cells_lock);
+		_leave("");
+		return;
+	}
+
+	write_unlock(&afs_cells_lock);
+
+	if (!list_empty(&cell->sv_list))	BUG();
+	if (!list_empty(&cell->sv_graveyard))	BUG();
+	if (!list_empty(&cell->vl_list))	BUG();
+	if (!list_empty(&cell->vl_graveyard))	BUG();
+
+	_leave(" [unused]");
+} /* end afs_put_cell() */
+
+/*****************************************************************************/
+/*
+ * destroy a cell record
+ */
+static void afs_cell_destroy(afs_cell_t *cell)
+{
+	_enter("%p{%d,%s}",cell,atomic_read(&cell->usage),cell->name);
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&afs_cells_lock);
+
+	/* sanity check */
+	if (atomic_read(&cell->usage)!=0)
+		BUG();
+
+	list_del_init(&cell->link);
+
+	write_unlock(&afs_cells_lock);
+
+	down_write(&afs_cells_sem);
+
+	afs_proc_cell_remove(cell);
+
+	down_write(&afs_proc_cells_sem);
+	list_del_init(&cell->proc_link);
+	up_write(&afs_proc_cells_sem);
+
+	up_write(&afs_cells_sem);
+
+	if (!list_empty(&cell->sv_list))	BUG();
+	if (!list_empty(&cell->sv_graveyard))	BUG();
+	if (!list_empty(&cell->vl_list))	BUG();
+	if (!list_empty(&cell->vl_graveyard))	BUG();
+
+	/* finish cleaning up the cell */
+	kfree(cell);
+
+	_leave(" [destroyed]");
+} /* end afs_cell_destroy() */
+
+/*****************************************************************************/
+/*
+ * lookup the server record corresponding to an Rx RPC peer
+ */
+int afs_server_find_by_peer(const struct rxrpc_peer *peer, afs_server_t **_server)
+{
+	struct list_head *_pc, *_ps;
+	afs_server_t *server;
+	afs_cell_t *cell;
+
+	_enter("%p{a=%08x},",peer,ntohl(peer->addr.s_addr));
+
+	/* search the cell list */
+	read_lock(&afs_cells_lock);
+
+	list_for_each(_pc,&afs_cells) {
+		cell = list_entry(_pc,afs_cell_t,link);
+
+		_debug("? cell %s",cell->name);
+
+		write_lock(&cell->sv_lock);
+
+		/* check the active list */
+		list_for_each(_ps,&cell->sv_list) {
+			server = list_entry(_ps,afs_server_t,link);
+
+			_debug("?? server %08x",ntohl(server->addr.s_addr));
+
+			if (memcmp(&server->addr,&peer->addr,sizeof(struct in_addr))==0)
+				goto found_server;
+		}
+
+		/* check the inactive list */
+		spin_lock(&cell->sv_gylock);
+		list_for_each(_ps,&cell->sv_graveyard) {
+			server = list_entry(_ps,afs_server_t,link);
+
+			_debug("?? dead server %08x",ntohl(server->addr.s_addr));
+
+			if (memcmp(&server->addr,&peer->addr,sizeof(struct in_addr))==0)
+				goto found_dead_server;
+		}
+		spin_unlock(&cell->sv_gylock);
+
+		write_unlock(&cell->sv_lock);
+	}
+	read_unlock(&afs_cells_lock);
+
+	_leave(" = -ENOENT");
+	return -ENOENT;
+
+	/* we found it in the graveyard - resurrect it */
+ found_dead_server:
+	list_del(&server->link);
+	list_add_tail(&server->link,&cell->sv_list);
+	afs_get_server(server);
+	afs_kafstimod_del_timer(&server->timeout);
+	spin_unlock(&cell->sv_gylock);
+	goto success;
+
+	/* we found it - increment its ref count and return it */
+ found_server:
+	afs_get_server(server);
+
+ success:
+	write_unlock(&cell->sv_lock);
+	read_unlock(&afs_cells_lock);
+
+	*_server = server;
+	_leave(" = 0 (s=%p c=%p)",server,cell);
+	return 0;
+
+} /* end afs_server_find_by_peer() */
+
+/*****************************************************************************/
+/*
+ * purge in-memory cell database on module unload
+ * - the timeout daemon is stopped before calling this
+ */
+void afs_cell_purge(void)
+{
+	afs_vlocation_t *vlocation;
+	afs_cell_t *cell;
+
+	_enter("");
+
+	if (afs_cell_root)
+		afs_put_cell(afs_cell_root);
+
+	while (!list_empty(&afs_cells)) {
+		cell = NULL;
+
+		/* remove the next cell from the front of the list */
+		write_lock(&afs_cells_lock);
+
+		if (!list_empty(&afs_cells)) {
+			cell = list_entry(afs_cells.next,afs_cell_t,link);
+			list_del_init(&cell->link);
+		}
+
+		write_unlock(&afs_cells_lock);
+
+		if (cell) {
+			_debug("PURGING CELL %s (%d)",cell->name,atomic_read(&cell->usage));
+
+			if (!list_empty(&cell->sv_list)) BUG();
+			if (!list_empty(&cell->vl_list)) BUG();
+
+			/* purge the cell's VL graveyard list */
+			_debug(" - clearing VL graveyard");
+
+			spin_lock(&cell->vl_gylock);
+
+			while (!list_empty(&cell->vl_graveyard)) {
+				vlocation = list_entry(cell->vl_graveyard.next,
+						       afs_vlocation_t,link);
+				list_del_init(&vlocation->link);
+
+				afs_kafstimod_del_timer(&vlocation->timeout);
+
+				spin_unlock(&cell->vl_gylock);
+
+				afs_vlocation_do_timeout(vlocation);
+				/* TODO: race if move to use krxtimod instead of kafstimod */
+
+				spin_lock(&cell->vl_gylock);
+			}
+
+			spin_unlock(&cell->vl_gylock);
+
+			/* purge the cell's server graveyard list */
+			_debug(" - clearing server graveyard");
+
+			spin_lock(&cell->sv_gylock);
+
+			while (!list_empty(&cell->sv_graveyard)) {
+				afs_server_t *server;
+
+				server = list_entry(cell->sv_graveyard.next,afs_server_t,link);
+				list_del_init(&server->link);
+
+				afs_kafstimod_del_timer(&server->timeout);
+
+				spin_unlock(&cell->sv_gylock);
+
+				afs_server_do_timeout(server);
+
+				spin_lock(&cell->sv_gylock);
+			}
+
+			spin_unlock(&cell->sv_gylock);
+
+			/* now the cell should be left with no references */
+			afs_cell_destroy(cell);
+		}
+	}
+
+	_leave("");
+} /* end afs_cell_purge() */
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
new file mode 100644
index 000000000000..48eb9fa91f19
--- /dev/null
+++ b/fs/afs/cell.h
@@ -0,0 +1,63 @@
+/* cell.h: AFS cell record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_CELL_H
+#define _LINUX_AFS_CELL_H
+
+#include "types.h"
+
+extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
+
+/*****************************************************************************/
+/*
+ * AFS cell record
+ */
+struct afs_cell
+{
+	atomic_t		usage;
+	struct list_head	link;		/* main cell list link */
+	struct list_head	proc_link;	/* /proc cell list link */
+	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
+	struct list_head	caches;		/* list of caches currently backing this cell */
+
+	/* server record management */
+	rwlock_t		sv_lock;	/* active server list lock */
+	struct list_head	sv_list;	/* active server list */
+	struct list_head	sv_graveyard;	/* inactive server list */
+	spinlock_t		sv_gylock;	/* inactive server list lock */
+
+	/* volume location record management */
+	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
+	struct list_head	vl_list;	/* cell's active VL record list */
+	struct list_head	vl_graveyard;	/* cell's inactive VL record list */
+	spinlock_t		vl_gylock;	/* graveyard lock */
+	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
+	unsigned short		vl_curr_svix;	/* current server index */
+	struct in_addr		vl_addrs[16];	/* cell VL server addresses */
+
+	char			name[0];	/* cell name - must go last */
+};
+
+extern int afs_cell_init(void);
+
+extern int afs_cell_create(const char *name, char *vllist, afs_cell_t **_cell);
+
+extern int afs_cell_lookup(const char *name, afs_cell_t **_cell);
+
+#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
+
+extern afs_cell_t *afs_get_cell_maybe(afs_cell_t **_cell);
+
+extern void afs_put_cell(afs_cell_t *cell);
+
+extern void afs_cell_purge(void);
+
+#endif /* _LINUX_AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
new file mode 100644
index 000000000000..b95c3625257a
--- /dev/null
+++ b/fs/afs/cmservice.c
@@ -0,0 +1,639 @@
+/* cmservice.c: AFS Cache Manager Service
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "server.h"
+#include "cell.h"
+#include "transport.h"
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "cmservice.h"
+#include "internal.h"
+
+static unsigned afscm_usage;		/* AFS cache manager usage count */
+static struct rw_semaphore afscm_sem;	/* AFS cache manager start/stop semaphore */
+
+static int afscm_new_call(struct rxrpc_call *call);
+static void afscm_attention(struct rxrpc_call *call);
+static void afscm_error(struct rxrpc_call *call);
+static void afscm_aemap(struct rxrpc_call *call);
+
+static void _SRXAFSCM_CallBack(struct rxrpc_call *call);
+static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call);
+static void _SRXAFSCM_Probe(struct rxrpc_call *call);
+
+typedef void (*_SRXAFSCM_xxxx_t)(struct rxrpc_call *call);
+
+static const struct rxrpc_operation AFSCM_ops[] = {
+	{
+		.id	= 204,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "CallBack",
+		.user	= _SRXAFSCM_CallBack,
+	},
+	{
+		.id	= 205,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "InitCallBackState",
+		.user	= _SRXAFSCM_InitCallBackState,
+	},
+	{
+		.id	= 206,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "Probe",
+		.user	= _SRXAFSCM_Probe,
+	},
+#if 0
+	{
+		.id	= 207,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetLock",
+		.user	= _SRXAFSCM_GetLock,
+	},
+	{
+		.id	= 208,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetCE",
+		.user	= _SRXAFSCM_GetCE,
+	},
+	{
+		.id	= 209,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetXStatsVersion",
+		.user	= _SRXAFSCM_GetXStatsVersion,
+	},
+	{
+		.id	= 210,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetXStats",
+		.user	= _SRXAFSCM_GetXStats,
+	}
+#endif
+};
+
+static struct rxrpc_service AFSCM_service = {
+	.name		= "AFS/CM",
+	.owner		= THIS_MODULE,
+	.link		= LIST_HEAD_INIT(AFSCM_service.link),
+	.new_call	= afscm_new_call,
+	.service_id	= 1,
+	.attn_func	= afscm_attention,
+	.error_func	= afscm_error,
+	.aemap_func	= afscm_aemap,
+	.ops_begin	= &AFSCM_ops[0],
+	.ops_end	= &AFSCM_ops[sizeof(AFSCM_ops)/sizeof(AFSCM_ops[0])],
+};
+
+static DECLARE_COMPLETION(kafscmd_alive);
+static DECLARE_COMPLETION(kafscmd_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafscmd_sleepq);
+static LIST_HEAD(kafscmd_attention_list);
+static LIST_HEAD(afscm_calls);
+static spinlock_t afscm_calls_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t kafscmd_attention_lock = SPIN_LOCK_UNLOCKED;
+static int kafscmd_die;
+
+/*****************************************************************************/
+/*
+ * AFS Cache Manager kernel thread
+ */
+static int kafscmd(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_call *call;
+	_SRXAFSCM_xxxx_t func;
+	int die;
+
+	printk("kAFS: Started kafscmd %d\n",current->pid);
+	strcpy(current->comm,"kafscmd");
+
+	daemonize();
+
+	complete(&kafscmd_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+	do {
+		if (list_empty(&kafscmd_attention_list)) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&kafscmd_sleepq,&myself);
+
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (!list_empty(&kafscmd_attention_list) ||
+				    signal_pending(current) ||
+				    kafscmd_die)
+					break;
+
+				schedule();
+			}
+
+			remove_wait_queue(&kafscmd_sleepq,&myself);
+			set_current_state(TASK_RUNNING);
+		}
+
+		die = kafscmd_die;
+
+		/* dequeue the next call requiring attention */
+		call = NULL;
+		spin_lock(&kafscmd_attention_lock);
+
+		if (!list_empty(&kafscmd_attention_list)) {
+			call = list_entry(kafscmd_attention_list.next,
+					  struct rxrpc_call,
+					  app_attn_link);
+			list_del_init(&call->app_attn_link);
+			die = 0;
+		}
+
+		spin_unlock(&kafscmd_attention_lock);
+
+		if (call) {
+			/* act upon it */
+			_debug("@@@ Begin Attend Call %p",call);
+
+			func = call->app_user;
+			if (func)
+				func(call);
+
+			rxrpc_put_call(call);
+
+			_debug("@@@ End Attend Call %p",call);
+		}
+
+	} while(!die);
+
+	/* and that's all */
+	complete_and_exit(&kafscmd_dead,0);
+
+} /* end kafscmd() */
+
+/*****************************************************************************/
+/*
+ * handle a call coming in to the cache manager
+ * - if I want to keep the call, I must increment its usage count
+ * - the return value will be negated and passed back in an abort packet if non-zero
+ * - serialised by virtue of there only being one krxiod
+ */
+static int afscm_new_call(struct rxrpc_call *call)
+{
+	_enter("%p{cid=%u u=%d}",call,ntohl(call->call_id),atomic_read(&call->usage));
+
+	rxrpc_get_call(call);
+
+	/* add to my current call list */
+	spin_lock(&afscm_calls_lock);
+	list_add(&call->app_link,&afscm_calls);
+	spin_unlock(&afscm_calls_lock);
+
+	_leave(" = 0");
+	return 0;
+
+} /* end afscm_new_call() */
+
+/*****************************************************************************/
+/*
+ * queue on the kafscmd queue for attention
+ */
+static void afscm_attention(struct rxrpc_call *call)
+{
+	_enter("%p{cid=%u u=%d}",call,ntohl(call->call_id),atomic_read(&call->usage));
+
+	spin_lock(&kafscmd_attention_lock);
+
+	if (list_empty(&call->app_attn_link)) {
+		list_add_tail(&call->app_attn_link,&kafscmd_attention_list);
+		rxrpc_get_call(call);
+	}
+
+	spin_unlock(&kafscmd_attention_lock);
+
+	wake_up(&kafscmd_sleepq);
+
+	_leave(" {u=%d}",atomic_read(&call->usage));
+} /* end afscm_attention() */
+
+/*****************************************************************************/
+/*
+ * handle my call being aborted
+ * - clean up, dequeue and put my ref to the call
+ */
+static void afscm_error(struct rxrpc_call *call)
+{
+	int removed;
+
+	_enter("%p{est=%s ac=%u er=%d}",
+	       call,
+	       rxrpc_call_error_states[call->app_err_state],
+	       call->app_abort_code,
+	       call->app_errno);
+
+	spin_lock(&kafscmd_attention_lock);
+
+	if (list_empty(&call->app_attn_link)) {
+		list_add_tail(&call->app_attn_link,&kafscmd_attention_list);
+		rxrpc_get_call(call);
+	}
+
+	spin_unlock(&kafscmd_attention_lock);
+
+	removed = 0;
+	spin_lock(&afscm_calls_lock);
+	if (!list_empty(&call->app_link)) {
+		list_del_init(&call->app_link);
+		removed = 1;
+	}
+	spin_unlock(&afscm_calls_lock);
+
+	if (removed)
+		rxrpc_put_call(call);
+
+	wake_up(&kafscmd_sleepq);
+
+	_leave("");
+} /* end afscm_error() */
+
+/*****************************************************************************/
+/*
+ * map afs abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afscm_aemap(struct rxrpc_call *call)
+{
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		break;
+	case RXRPC_ESTATE_PEER_ABORT:
+		call->app_errno = -ECONNABORTED;
+		break;
+	default:
+		break;
+	}
+} /* end afscm_aemap() */
+
+/*****************************************************************************/
+/*
+ * start the cache manager service if not already started
+ */
+int afscm_start(void)
+{
+	int ret;
+
+	down_write(&afscm_sem);
+	if (!afscm_usage) {
+		ret = kernel_thread(kafscmd,NULL,0);
+		if (ret<0)
+			goto out;
+
+		wait_for_completion(&kafscmd_alive);
+
+		ret = rxrpc_add_service(afs_transport,&AFSCM_service);
+		if (ret<0)
+			goto kill;
+	}
+
+	afscm_usage++;
+	up_write(&afscm_sem);
+
+	return 0;
+
+ kill:
+	kafscmd_die = 1;
+	wake_up(&kafscmd_sleepq);
+	wait_for_completion(&kafscmd_dead);
+
+ out:
+	up_write(&afscm_sem);
+	return ret;
+
+} /* end afscm_start() */
+
+/*****************************************************************************/
+/*
+ * stop the cache manager service
+ */
+void afscm_stop(void)
+{
+	struct rxrpc_call *call;
+
+	down_write(&afscm_sem);
+
+	if (afscm_usage==0) BUG();
+	afscm_usage--;
+
+	if (afscm_usage==0) {
+		/* don't want more incoming calls */
+		rxrpc_del_service(afs_transport,&AFSCM_service);
+
+		/* abort any calls I've still got open (the afscm_error() will dequeue them) */
+		spin_lock(&afscm_calls_lock);
+		while (!list_empty(&afscm_calls)) {
+			call = list_entry(afscm_calls.next,struct rxrpc_call,app_link);
+			list_del_init(&call->app_link);
+			rxrpc_get_call(call);
+			spin_unlock(&afscm_calls_lock);
+
+			rxrpc_call_abort(call,-ESRCH); /* abort, dequeue and put */
+
+			rxrpc_put_call(call);
+
+			spin_lock(&afscm_calls_lock);
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		/* get rid of my daemon */
+		kafscmd_die = 1;
+		wake_up(&kafscmd_sleepq);
+		wait_for_completion(&kafscmd_dead);
+
+		/* dispose of any calls waiting for attention */
+		spin_lock(&kafscmd_attention_lock);
+		while (!list_empty(&kafscmd_attention_list)) {
+			call = list_entry(kafscmd_attention_list.next,
+					  struct rxrpc_call,
+					  app_attn_link);
+
+			list_del_init(&call->app_attn_link);
+			spin_unlock(&kafscmd_attention_lock);
+
+			rxrpc_put_call(call);
+
+			spin_lock(&kafscmd_attention_lock);
+		}
+		spin_unlock(&kafscmd_attention_lock);
+	}
+
+	up_write(&afscm_sem);
+
+} /* end afscm_stop() */
+
+/*****************************************************************************/
+/*
+ * handle the fileserver breaking a set of callbacks
+ */
+static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count, qty, tmp;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet
+		 * - drain all the data from the call and send the reply
+		 */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		ret = -EBADMSG;
+		qty = call->app_ready_qty;
+		if (qty<8 || qty>50*(6*4)+8)
+			break;
+
+		{
+			afs_callback_t *cb, *pcb;
+			int loop;
+			u32 *fp, *bp;
+
+			fp = rxrpc_call_alloc_scratch(call,qty);
+
+			/* drag the entire argument block out to the scratch space */
+			ret = rxrpc_call_read_data(call,fp,qty,0);
+			if (ret<0)
+				break;
+
+			/* and unmarshall the parameter block */
+			ret = -EBADMSG;
+			count = ntohl(*fp++);
+			if (count>AFSCBMAX ||
+			    (count*(3*4)+8 != qty && count*(6*4)+8 != qty))
+				break;
+
+			bp = fp + count*3;
+			tmp = ntohl(*bp++);
+			if (tmp>0 && tmp!=count)
+				break;
+			if (tmp==0)
+				bp = NULL;
+
+			pcb = cb = rxrpc_call_alloc_scratch_s(call,afs_callback_t);
+
+			for (loop=count-1; loop>=0; loop--) {
+				pcb->fid.vid	= ntohl(*fp++);
+				pcb->fid.vnode	= ntohl(*fp++);
+				pcb->fid.unique	= ntohl(*fp++);
+				if (bp) {
+					pcb->version	= ntohl(*bp++);
+					pcb->expiry	= ntohl(*bp++);
+					pcb->type	= ntohl(*bp++);
+				}
+				else {
+					pcb->version	= 0;
+					pcb->expiry	= 0;
+					pcb->type	= AFSCM_CB_UNTYPED;
+				}
+				pcb++;
+			}
+
+			/* invoke the actual service routine */
+			ret = SRXAFSCM_CallBack(server,count,cb);
+			if (ret<0)
+				break;
+		}
+
+		/* send the reply */
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_CallBack() */
+
+/*****************************************************************************/
+/*
+ * handle the fileserver asking us to initialise our callback state
+ */
+static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet - drain all the data from the call */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		/* shouldn't be any args */
+		ret = -EBADMSG;
+		break;
+
+		/* send the reply when asked for it */
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+		/* invoke the actual service routine */
+		ret = SRXAFSCM_InitCallBackState(server);
+		if (ret<0)
+			break;
+
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_InitCallBackState() */
+
+/*****************************************************************************/
+/*
+ * handle a probe from a fileserver
+ */
+static void _SRXAFSCM_Probe(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet - drain all the data from the call */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		/* shouldn't be any args */
+		ret = -EBADMSG;
+		break;
+
+		/* send the reply when asked for it */
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+		/* invoke the actual service routine */
+		ret = SRXAFSCM_Probe(server);
+		if (ret<0)
+			break;
+
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_Probe() */
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
new file mode 100644
index 000000000000..89fb14e7615b
--- /dev/null
+++ b/fs/afs/cmservice.h
@@ -0,0 +1,27 @@
+/* cmservice.h: AFS Cache Manager Service declarations
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_CMSERVICE_H
+#define _LINUX_AFS_CMSERVICE_H
+
+#include <rxrpc/transport.h>
+#include "types.h"
+
+/* cache manager start/stop */
+extern int afscm_start(void);
+extern void afscm_stop(void);
+
+/* cache manager server functions */
+extern int SRXAFSCM_InitCallBackState(afs_server_t *server);
+extern int SRXAFSCM_CallBack(afs_server_t *server, size_t count, afs_callback_t callbacks[]);
+extern int SRXAFSCM_Probe(afs_server_t *server);
+
+#endif /* _LINUX_AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
new file mode 100644
index 000000000000..d39345f4e277
--- /dev/null
+++ b/fs/afs/dir.c
@@ -0,0 +1,642 @@
+/* dir.c: AFS filesystem directory handling
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include "vnode.h"
+#include "volume.h"
+#include <rxrpc/call.h>
+#include "super.h"
+#include "internal.h"
+
+static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry);
+static int afs_dir_open(struct inode *inode, struct file *file);
+static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir);
+static int afs_d_revalidate(struct dentry *dentry, int flags);
+static int afs_d_delete(struct dentry *dentry);
+static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos,
+				     ino_t ino, unsigned dtype);
+
+struct file_operations afs_dir_file_operations = {
+	.open		= afs_dir_open,
+	.readdir	= afs_dir_readdir,
+};
+
+struct inode_operations afs_dir_inode_operations = {
+	.lookup		= afs_dir_lookup,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+//	.create		= afs_dir_create,
+//	.link		= afs_dir_link,
+//	.unlink		= afs_dir_unlink,
+//	.symlink	= afs_dir_symlink,
+//	.mkdir		= afs_dir_mkdir,
+//	.rmdir		= afs_dir_rmdir,
+//	.mknod		= afs_dir_mknod,
+//	.rename		= afs_dir_rename,
+};
+
+static struct dentry_operations afs_fs_dentry_operations = {
+	.d_revalidate	= afs_d_revalidate,
+	.d_delete	= afs_d_delete,
+};
+
+#define AFS_DIR_HASHTBL_SIZE	128
+#define AFS_DIR_DIRENT_SIZE	32
+#define AFS_DIRENT_PER_BLOCK	64
+
+typedef union afs_dirent {
+	struct {
+		u8	valid;
+		u8	unused[1];
+		u16	hash_next;
+		u32	vnode;
+		u32	unique;
+		u8	name[16];
+		u8	overflow[4];	/* if any char of the name (inc NUL) reaches here, consume
+					 * the next dirent too */
+	};
+	u8	extended_name[32];
+} afs_dirent_t;
+
+/* AFS directory page header (one at the beginning of every 2048-byte chunk) */
+typedef struct afs_dir_pagehdr {
+	u16	npages;
+	u16	magic;
+#define AFS_DIR_MAGIC htons(1234)
+	u8	nentries;
+	u8	bitmap[8];
+	u8	pad[19];
+} afs_dir_pagehdr_t;
+
+/* directory block layout */
+typedef union afs_dir_block {
+
+	afs_dir_pagehdr_t pagehdr;
+
+	struct {
+		afs_dir_pagehdr_t pagehdr;
+		u8		alloc_ctrs[128];
+		u16		hashtable[AFS_DIR_HASHTBL_SIZE]; /* dir hash table */
+	} hdr;
+
+	afs_dirent_t dirents[AFS_DIRENT_PER_BLOCK];
+} afs_dir_block_t;
+
+/* layout on a linux VM page */
+typedef struct afs_dir_page {
+	afs_dir_block_t	blocks[PAGE_SIZE/sizeof(afs_dir_block_t)];
+} afs_dir_page_t;
+
+struct afs_dir_lookup_cookie {
+	afs_fid_t	fid;
+	const char	*name;
+	size_t		nlen;
+	int		found;
+};
+
+/*****************************************************************************/
+/*
+ * check that a directory page is valid
+ */
+static inline void afs_dir_check_page(struct inode *dir, struct page *page)
+{
+	afs_dir_page_t *dbuf;
+	loff_t latter;
+	int tmp, qty;
+
+#if 0
+	/* check the page count */
+	qty = desc.size/sizeof(dbuf->blocks[0]);
+	if (qty==0)
+		goto error;
+
+	if (page->index==0 && qty!=ntohs(dbuf->blocks[0].pagehdr.npages)) {
+		printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
+		       __FUNCTION__,dir->i_ino,qty,ntohs(dbuf->blocks[0].pagehdr.npages));
+		goto error;
+	}
+#endif
+
+	/* determine how many magic numbers there should be in this page */
+	latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT);
+	if (latter >= PAGE_SIZE)
+		qty = PAGE_SIZE;
+	else
+		qty = latter;
+	qty /= sizeof(afs_dir_block_t);
+
+	/* check them */
+	dbuf = page_address(page);
+	for (tmp=0; tmp<qty; tmp++) {
+		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
+			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
+			       __FUNCTION__,dir->i_ino,tmp,
+			       qty,ntohs(dbuf->blocks[tmp].pagehdr.magic));
+			goto error;
+		}
+	}
+
+	SetPageChecked(page);
+	return;
+
+ error:
+	SetPageChecked(page);
+	SetPageError(page);
+
+} /* end afs_dir_check_page() */
+
+/*****************************************************************************/
+/*
+ * discard a page cached in the pagecache
+ */
+static inline void afs_dir_put_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+
+} /* end afs_dir_put_page() */
+
+/*****************************************************************************/
+/*
+ * get a page into the pagecache
+ */
+static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
+{
+	struct page *page;
+
+	_enter("{%lu},%lu",dir->i_ino,index);
+
+	page = read_cache_page(dir->i_mapping,index,
+			       (filler_t*)dir->i_mapping->a_ops->readpage,NULL);
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		kmap(page);
+		if (!PageUptodate(page))
+			goto fail;
+		if (!PageChecked(page))
+			afs_dir_check_page(dir,page);
+		if (PageError(page))
+			goto fail;
+	}
+	return page;
+
+ fail:
+	afs_dir_put_page(page);
+	return ERR_PTR(-EIO);
+} /* end afs_dir_get_page() */
+
+/*****************************************************************************/
+/*
+ * open an AFS directory file
+ */
+static int afs_dir_open(struct inode *inode, struct file *file)
+{
+	_enter("{%lu}",inode->i_ino);
+
+	if (sizeof(afs_dir_block_t) != 2048) BUG();
+	if (sizeof(afs_dirent_t) != 32) BUG();
+
+	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED)
+		return -ENOENT;
+
+	_leave(" = 0");
+	return 0;
+
+} /* end afs_dir_open() */
+
+/*****************************************************************************/
+/*
+ * deal with one block in an AFS directory
+ */
+static int afs_dir_iterate_block(unsigned *fpos,
+				    afs_dir_block_t *block,
+				    unsigned blkoff,
+				    void *cookie,
+				    filldir_t filldir)
+{
+	afs_dirent_t *dire;
+	unsigned offset, next, curr;
+	size_t nlen;
+	int tmp, ret;
+
+	_enter("%u,%x,%p,,",*fpos,blkoff,block);
+
+	curr = (*fpos - blkoff) / sizeof(afs_dirent_t);
+
+	/* walk through the block, an entry at a time */
+	for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries;
+	     offset < AFS_DIRENT_PER_BLOCK;
+	     offset = next
+	     ) {
+		next = offset + 1;
+
+		/* skip entries marked unused in the bitmap */
+		if (!(block->pagehdr.bitmap[offset/8] & (1 << (offset % 8)))) {
+			_debug("ENT[%u.%u]: unused\n",blkoff/sizeof(afs_dir_block_t),offset);
+			if (offset>=curr)
+				*fpos = blkoff + next * sizeof(afs_dirent_t);
+			continue;
+		}
+
+		/* got a valid entry */
+		dire = &block->dirents[offset];
+		nlen = strnlen(dire->name,sizeof(*block) - offset*sizeof(afs_dirent_t));
+
+		_debug("ENT[%u.%u]: %s %u \"%.*s\"\n",
+		       blkoff/sizeof(afs_dir_block_t),offset,
+		       offset<curr ? "skip" : "fill",
+		       nlen,nlen,dire->name);
+
+		/* work out where the next possible entry is */
+		for (tmp=nlen; tmp>15; tmp-=sizeof(afs_dirent_t)) {
+			if (next>=AFS_DIRENT_PER_BLOCK) {
+				_debug("ENT[%u.%u]:"
+				       " %u travelled beyond end dir block (len %u/%u)\n",
+				       blkoff/sizeof(afs_dir_block_t),offset,next,tmp,nlen);
+				return -EIO;
+			}
+			if (!(block->pagehdr.bitmap[next/8] & (1 << (next % 8)))) {
+				_debug("ENT[%u.%u]: %u unmarked extension (len %u/%u)\n",
+				       blkoff/sizeof(afs_dir_block_t),offset,next,tmp,nlen);
+				return -EIO;
+			}
+
+			_debug("ENT[%u.%u]: ext %u/%u\n",
+			       blkoff/sizeof(afs_dir_block_t),next,tmp,nlen);
+			next++;
+		}
+
+		/* skip if starts before the current position */
+		if (offset<curr)
+			continue;
+
+		/* found the next entry */
+		ret = filldir(cookie,
+			      dire->name,
+			      nlen,
+			      blkoff + offset * sizeof(afs_dirent_t),
+			      ntohl(dire->vnode),
+			      filldir==afs_dir_lookup_filldir ? dire->unique : DT_UNKNOWN);
+		if (ret<0) {
+			_leave(" = 0 [full]");
+			return 0;
+		}
+
+		*fpos = blkoff + next * sizeof(afs_dirent_t);
+	}
+
+	_leave(" = 1 [more]");
+	return 1;
+} /* end afs_dir_iterate_block() */
+
+/*****************************************************************************/
+/*
+ * read an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, filldir_t filldir)
+{
+	afs_dir_block_t	*dblock;
+	afs_dir_page_t *dbuf;
+	struct page *page;
+	unsigned blkoff, limit;
+	int ret;
+
+	_enter("{%lu},%u,,",dir->i_ino,*fpos);
+
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+
+	/* round the file position up to the next entry boundary */
+	*fpos += sizeof(afs_dirent_t) - 1;
+	*fpos &= ~(sizeof(afs_dirent_t) - 1);
+
+	/* walk through the blocks in sequence */
+	ret = 0;
+	while (*fpos < dir->i_size) {
+		blkoff = *fpos & ~(sizeof(afs_dir_block_t) - 1);
+
+		/* fetch the appropriate page from the directory */
+		page = afs_dir_get_page(dir,blkoff/PAGE_SIZE);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			break;
+		}
+
+		limit = blkoff & ~(PAGE_SIZE-1);
+
+		dbuf = page_address(page);
+
+		/* deal with the individual blocks stashed on this page */
+		do {
+			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / sizeof(afs_dir_block_t)];
+			ret = afs_dir_iterate_block(fpos,dblock,blkoff,cookie,filldir);
+			if (ret!=1) {
+				afs_dir_put_page(page);
+				goto out;
+			}
+
+			blkoff += sizeof(afs_dir_block_t);
+
+		} while (*fpos < dir->i_size && blkoff < limit);
+
+		afs_dir_put_page(page);
+		ret = 0;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_dir_iterate() */
+
+/*****************************************************************************/
+/*
+ * read an AFS directory
+ */
+static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
+{
+	unsigned fpos;
+	int ret;
+
+	_enter("{%Ld,{%lu}}",file->f_pos,file->f_dentry->d_inode->i_ino);
+
+	fpos = file->f_pos;
+	ret = afs_dir_iterate(file->f_dentry->d_inode,&fpos,cookie,filldir);
+	file->f_pos = fpos;
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_dir_readdir() */
+
+/*****************************************************************************/
+/*
+ * search the directory for a name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID uniquifier through dtype
+ */
+static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos,
+				  ino_t ino, unsigned dtype)
+{
+	struct afs_dir_lookup_cookie *cookie = _cookie;
+
+	_enter("{%s,%u},%s,%u,,%lu,%u",cookie->name,cookie->nlen,name,nlen,ino,ntohl(dtype));
+
+	if (cookie->nlen != nlen || memcmp(cookie->name,name,nlen)!=0) {
+		_leave(" = 0 [no]");
+		return 0;
+	}
+
+	cookie->fid.vnode = ino;
+	cookie->fid.unique = ntohl(dtype);
+	cookie->found = 1;
+
+	_leave(" = -1 [found]");
+	return -1;
+} /* end afs_dir_lookup_filldir() */
+
+/*****************************************************************************/
+/*
+ * look up an entry in a directory
+ */
+static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_dir_lookup_cookie cookie;
+	struct afs_super_info *as;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	unsigned fpos;
+	int ret;
+
+	_enter("{%lu},{%s}",dir->i_ino,dentry->d_name.name);
+
+	/* insanity checks first */
+	if (sizeof(afs_dir_block_t) != 2048) BUG();
+	if (sizeof(afs_dirent_t) != 32) BUG();
+
+	if (dentry->d_name.len > 255) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	vnode = AFS_FS_I(dir);
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return ERR_PTR(-ESTALE);
+	}
+
+	as = dir->i_sb->s_fs_info;
+
+	/* search the directory */
+	cookie.name	= dentry->d_name.name;
+	cookie.nlen	= dentry->d_name.len;
+	cookie.fid.vid	= as->volume->vid;
+	cookie.found	= 0;
+
+	fpos = 0;
+	ret = afs_dir_iterate(dir,&fpos,&cookie,afs_dir_lookup_filldir);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = -ENOENT;
+	if (!cookie.found) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	/* instantiate the dentry */
+	ret = afs_iget(dir->i_sb,&cookie.fid,&inode);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	dentry->d_op = &afs_fs_dentry_operations;
+	dentry->d_fsdata = (void*) (unsigned) vnode->status.version;
+
+	d_add(dentry,inode);
+	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
+	       cookie.fid.vnode,
+	       cookie.fid.unique,
+	       dentry->d_inode->i_ino,
+	       dentry->d_inode->i_version);
+
+	return NULL;
+} /* end afs_dir_lookup() */
+
+/*****************************************************************************/
+/*
+ * check that a dentry lookup hit has found a valid entry
+ * - NOTE! the hit can be a negative hit too, so we can't assume we have an inode
+ * (derived from nfs_lookup_revalidate)
+ */
+static int afs_d_revalidate(struct dentry *dentry, int flags)
+{
+	struct afs_dir_lookup_cookie cookie;
+	struct dentry *parent;
+	struct inode *inode, *dir;
+	unsigned fpos;
+	int ret;
+
+	_enter("%s,%x",dentry->d_name.name,flags);
+
+	/* lock down the parent dentry so we can peer at it */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	read_lock(&dparent_lock);
+	parent = dget(dentry->d_parent);
+	read_unlock(&dparent_lock);
+#else
+	lock_kernel();
+	parent = dget(dentry->d_parent);
+	unlock_kernel();
+#endif
+
+	dir = parent->d_inode;
+	inode = dentry->d_inode;
+
+	/* handle a negative inode */
+	if (!inode)
+		goto out_bad;
+
+	/* handle a bad inode */
+	if (is_bad_inode(inode)) {
+		printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
+		       dentry->d_parent->d_name.name,dentry->d_name.name);
+		goto out_bad;
+	}
+
+	/* force a full look up if the parent directory changed since last the server was consulted
+	 * - otherwise this inode must still exist, even if the inode details themselves have
+	 *   changed
+	 */
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_CHANGED)
+		afs_vnode_fetch_status(AFS_FS_I(dir));
+
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+		_debug("%s: parent dir deleted",dentry->d_name.name);
+		goto out_bad;
+	}
+
+	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) {
+		_debug("%s: file already deleted",dentry->d_name.name);
+		goto out_bad;
+	}
+
+	if ((unsigned)dentry->d_fsdata != (unsigned)AFS_FS_I(dir)->status.version) {
+		_debug("%s: parent changed %u -> %u",
+		       dentry->d_name.name,
+		       (unsigned)dentry->d_fsdata,
+		       (unsigned)AFS_FS_I(dir)->status.version);
+
+		/* search the directory for this vnode */
+		cookie.name	= dentry->d_name.name;
+		cookie.nlen	= dentry->d_name.len;
+		cookie.fid.vid	= AFS_FS_I(inode)->volume->vid;
+		cookie.found	= 0;
+
+		fpos = 0;
+		ret = afs_dir_iterate(dir,&fpos,&cookie,afs_dir_lookup_filldir);
+		if (ret<0) {
+			_debug("failed to iterate dir %s: %d",parent->d_name.name,ret);
+			goto out_bad;
+		}
+
+		if (!cookie.found) {
+			_debug("%s: dirent not found",dentry->d_name.name);
+			goto not_found;
+		}
+
+		/* if the vnode ID has changed, then the dirent points to a different file */
+		if (cookie.fid.vnode!=AFS_FS_I(inode)->fid.vnode) {
+			_debug("%s: dirent changed",dentry->d_name.name);
+			goto not_found;
+		}
+
+		/* if the vnode ID uniqifier has changed, then the file has been deleted */
+		if (cookie.fid.unique!=AFS_FS_I(inode)->fid.unique) {
+			_debug("%s: file deleted (uq %u -> %u I:%lu)",
+			       dentry->d_name.name,
+			       cookie.fid.unique,
+			       AFS_FS_I(inode)->fid.unique,
+			       inode->i_version);
+			spin_lock(&AFS_FS_I(inode)->lock);
+			AFS_FS_I(inode)->flags |= AFS_VNODE_DELETED;
+			spin_unlock(&AFS_FS_I(inode)->lock);
+			invalidate_inode_pages(inode->i_mapping);
+			goto out_bad;
+		}
+
+		dentry->d_fsdata = (void*) (unsigned) AFS_FS_I(dir)->status.version;
+	}
+
+ out_valid:
+	dput(parent);
+	_leave(" = 1 [valid]");
+	return 1;
+
+	/* the dirent, if it exists, now points to a different vnode */
+ not_found:
+	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+
+ out_bad:
+	if (inode) {
+		/* don't unhash if we have submounts */
+		if (have_submounts(dentry))
+			goto out_valid;
+	}
+
+	shrink_dcache_parent(dentry);
+
+	_debug("dropping dentry %s/%s",dentry->d_parent->d_name.name,dentry->d_name.name);
+	d_drop(dentry);
+
+	dput(parent);
+
+	_leave(" = 0 [bad]");
+	return 0;
+} /* end afs_d_revalidate() */
+
+/*****************************************************************************/
+/*
+ * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't sleep)
+ * - called from dput() when d_count is going to 0.
+ * - return 1 to request dentry be unhashed, 0 otherwise
+ */
+static int afs_d_delete(struct dentry *dentry)
+{
+	_enter("%s",dentry->d_name.name);
+
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		goto zap;
+
+	if (dentry->d_inode) {
+		if (AFS_FS_I(dentry->d_inode)->flags & AFS_VNODE_DELETED)
+			goto zap;
+	}
+
+	_leave(" = 0 [keep]");
+	return 0;
+
+ zap:
+	_leave(" = 1 [zap]");
+	return 1;
+} /* end afs_d_delete() */
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
new file mode 100644
index 000000000000..115befe16450
--- /dev/null
+++ b/fs/afs/errors.h
@@ -0,0 +1,34 @@
+/* errors.h: AFS abort/error codes
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _H_DB712916_5113_11D6_9A6D_0002B3163499
+#define _H_DB712916_5113_11D6_9A6D_0002B3163499
+
+#include "types.h"
+
+/* file server abort codes */
+typedef enum {
+	VSALVAGE	= 101,	/* volume needs salvaging */
+	VNOVNODE	= 102,	/* no such file/dir (vnode) */
+	VNOVOL		= 103,	/* no such volume or volume unavailable */
+	VVOLEXISTS	= 104,	/* volume name already exists */
+	VNOSERVICE	= 105,	/* volume not currently in service */
+	VOFFLINE	= 106,	/* volume is currently offline (more info available [VVL-spec]) */
+	VONLINE		= 107,	/* volume is already online */
+	VDISKFULL	= 108,	/* disk partition is full */
+	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
+	VBUSY		= 110,	/* volume is temporarily unavailable */
+	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
+} afs_rxfs_abort_t;
+
+extern int afs_abort_to_error(int abortcode);
+
+#endif /* _H_DB712916_5113_11D6_9A6D_0002B3163499 */
diff --git a/fs/afs/file.c b/fs/afs/file.c
new file mode 100644
index 000000000000..d14e427b5784
--- /dev/null
+++ b/fs/afs/file.c
@@ -0,0 +1,143 @@
+/* file.c: AFS filesystem file handling
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include <rxrpc/call.h>
+#include "internal.h"
+
+//static int afs_file_open(struct inode *inode, struct file *file);
+//static int afs_file_release(struct inode *inode, struct file *file);
+
+static int afs_file_readpage(struct file *file, struct page *page);
+
+//static ssize_t afs_file_read(struct file *file, char *buf, size_t size, loff_t *off);
+
+static ssize_t afs_file_write(struct file *file, const char *buf, size_t size, loff_t *off);
+
+struct inode_operations afs_file_inode_operations = {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+};
+
+struct file_operations afs_file_file_operations = {
+//	.open		= afs_file_open,
+//	.release	= afs_file_release,
+	.read		= generic_file_read, //afs_file_read,
+	.write		= afs_file_write,
+	.mmap		= generic_file_mmap,
+//	.fsync		= afs_file_fsync,
+};
+
+struct address_space_operations afs_fs_aops = {
+	.readpage	= afs_file_readpage,
+};
+
+/*****************************************************************************/
+/*
+ * AFS file read
+ */
+#if 0
+static ssize_t afs_file_read(struct file *file, char *buf, size_t size, loff_t *off)
+{
+	struct afs_inode_info *ai;
+
+	ai = AFS_FS_I(file->f_dentry->d_inode);
+	if (ai->flags & AFS_INODE_DELETED)
+		return -ESTALE;
+
+	return -EIO;
+} /* end afs_file_read() */
+#endif
+
+/*****************************************************************************/
+/*
+ * AFS file write
+ */
+static ssize_t afs_file_write(struct file *file, const char *buf, size_t size, loff_t *off)
+{
+	afs_vnode_t *vnode;
+
+	vnode = AFS_FS_I(file->f_dentry->d_inode);
+	if (vnode->flags & AFS_VNODE_DELETED)
+		return -ESTALE;
+
+	return -EIO;
+} /* end afs_file_write() */
+
+/*****************************************************************************/
+/*
+ * AFS read page from file (or symlink)
+ */
+static int afs_file_readpage(struct file *file, struct page *page)
+{
+	struct afs_rxfs_fetch_descriptor desc;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = page->mapping->host;
+
+	_enter("{%lu},{%lu}",inode->i_ino,page->index);
+
+	vnode = AFS_FS_I(inode);
+
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+
+	ret = -ESTALE;
+	if (vnode->flags & AFS_VNODE_DELETED)
+		goto error;
+
+	/* work out how much to get and from where */
+	desc.fid	= vnode->fid;
+	desc.offset	= page->index << PAGE_CACHE_SHIFT;
+	desc.size	= min((size_t)(inode->i_size - desc.offset),(size_t)PAGE_SIZE);
+	desc.buffer	= kmap(page);
+
+	clear_page(desc.buffer);
+
+	/* read the contents of the file from the server into the page */
+	ret = afs_vnode_fetch_data(vnode,&desc);
+	kunmap(page);
+	if (ret<0) {
+		if (ret==-ENOENT) {
+			_debug("got NOENT from server - marking file deleted and stale");
+			vnode->flags |= AFS_VNODE_DELETED;
+			ret = -ESTALE;
+		}
+		goto error;
+	}
+
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	_leave(" = 0");
+	return 0;
+
+ error:
+	SetPageError(page);
+	unlock_page(page);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_file_readpage() */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
new file mode 100644
index 000000000000..e4aabcb85d7d
--- /dev/null
+++ b/fs/afs/fsclient.c
@@ -0,0 +1,816 @@
+/* fsclient.c: AFS File Server client stubs
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "fsclient.h"
+#include "cmservice.h"
+#include "vnode.h"
+#include "server.h"
+#include "errors.h"
+#include "internal.h"
+
+#define FSFETCHSTATUS		132	/* AFS Fetch file status */
+#define FSFETCHDATA		130	/* AFS Fetch file data */
+#define FSGIVEUPCALLBACKS	147	/* AFS Discard server callback promises */
+#define FSGETVOLUMEINFO		148	/* AFS Get root volume information */
+#define FSGETROOTVOLUME		151	/* AFS Get root volume name */
+#define FSLOOKUP		161	/* AFS lookup file in directory */
+
+/*****************************************************************************/
+/*
+ * map afs abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afs_rxfs_aemap(struct rxrpc_call *call)
+{
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		break;
+	case RXRPC_ESTATE_PEER_ABORT:
+		call->app_errno = afs_abort_to_error(call->app_abort_code);
+		break;
+	default:
+		break;
+	}
+} /* end afs_rxfs_aemap() */
+
+/*****************************************************************************/
+/*
+ * get the root volume name from a fileserver
+ * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ */
+#if 0
+int afs_rxfs_get_root_volume(afs_server_t *server, char *buf, size_t *buflen)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[2];
+	size_t sent;
+	int ret;
+	u32 param[1];
+
+	kenter("%p,%p,%u",server,buf,*buflen);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGETROOTVOLUME;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(FSGETROOTVOLUME);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		kdebug("Got Error: %d",ret);
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		/* read the reply */
+		kdebug("Got Reply: qty=%d",call->app_ready_qty);
+
+		ret = -EBADMSG;
+		if (call->app_ready_qty <= 4)
+			goto abort;
+
+		ret = rxrpc_call_read_data(call,NULL,call->app_ready_qty,0);
+		if (ret<0)
+			goto abort;
+
+#if 0
+		/* unmarshall the reply */
+		bp = buffer;
+		for (loop=0; loop<65; loop++)
+			entry->name[loop] = ntohl(*bp++);
+		entry->name[64] = 0;
+
+		entry->type = ntohl(*bp++);
+		entry->num_servers = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].addr.s_addr = *bp++;
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].partition = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].flags = ntohl(*bp++);
+
+		for (loop=0; loop<3; loop++)
+			entry->volume_ids[loop] = ntohl(*bp++);
+
+		entry->clone_id = ntohl(*bp++);
+		entry->flags = ntohl(*bp);
+#endif
+
+		/* success */
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	kleave("");
+	return ret;
+} /* end afs_rxfs_get_root_volume() */
+#endif
+
+/*****************************************************************************/
+/*
+ * get information about a volume
+ */
+#if 0
+int afs_rxfs_get_volume_info(afs_server_t *server,
+			     const char *name,
+			     afs_volume_info_t *vinfo)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	size_t sent;
+	int ret;
+	u32 param[2], *bp, zero;
+
+	_enter("%p,%s,%p",server,name,vinfo);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGETVOLUMEINFO;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	piov[1].iov_len = strlen(name);
+	piov[1].iov_base = (char*)name;
+
+	zero = 0;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	param[0] = htonl(FSGETVOLUMEINFO);
+	param[1] = htonl(piov[1].iov_len);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,64);
+
+	ret = rxrpc_call_read_data(call,bp,64,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	vinfo->vid = ntohl(*bp++);
+	vinfo->type = ntohl(*bp++);
+
+	vinfo->type_vids[0] = ntohl(*bp++);
+	vinfo->type_vids[1] = ntohl(*bp++);
+	vinfo->type_vids[2] = ntohl(*bp++);
+	vinfo->type_vids[3] = ntohl(*bp++);
+	vinfo->type_vids[4] = ntohl(*bp++);
+
+	vinfo->nservers = ntohl(*bp++);
+	vinfo->servers[0].addr.s_addr = *bp++;
+	vinfo->servers[1].addr.s_addr = *bp++;
+	vinfo->servers[2].addr.s_addr = *bp++;
+	vinfo->servers[3].addr.s_addr = *bp++;
+	vinfo->servers[4].addr.s_addr = *bp++;
+	vinfo->servers[5].addr.s_addr = *bp++;
+	vinfo->servers[6].addr.s_addr = *bp++;
+	vinfo->servers[7].addr.s_addr = *bp++;
+
+	ret = -EBADMSG;
+	if (vinfo->nservers>8)
+		goto abort;
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+
+} /* end afs_rxfs_get_volume_info() */
+#endif
+
+/*****************************************************************************/
+/*
+ * fetch the status information for a file
+ */
+int afs_rxfs_fetch_file_status(afs_server_t *server,
+			       afs_vnode_t *vnode,
+			       afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{%u,%u,%u}",server,vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSFETCHSTATUS;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,16);
+	bp[0] = htonl(FSFETCHSTATUS);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+
+	piov[0].iov_len = 16;
+	piov[0].iov_base = bp;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,120);
+
+	ret = rxrpc_call_read_data(call,bp,120,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= vnode->fid.vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	vnode->cb_version		= ntohl(*bp++);
+	vnode->cb_expiry		= ntohl(*bp++);
+	vnode->cb_type			= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_fetch_file_status() */
+
+/*****************************************************************************/
+/*
+ * fetch the contents of a file or directory
+ */
+int afs_rxfs_fetch_file_data(afs_server_t *server,
+			     afs_vnode_t *vnode,
+			     struct afs_rxfs_fetch_descriptor *desc,
+			     afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{fid={%u,%u,%u},sz=%u,of=%lu}",
+	       server,
+	       desc->fid.vid,
+	       desc->fid.vnode,
+	       desc->fid.unique,
+	       desc->size,
+	       desc->offset);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSFETCHDATA;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,24);
+	bp[0] = htonl(FSFETCHDATA);
+	bp[1] = htonl(desc->fid.vid);
+	bp[2] = htonl(desc->fid.vnode);
+	bp[3] = htonl(desc->fid.unique);
+	bp[4] = htonl(desc->offset);
+	bp[5] = htonl(desc->size);
+
+	piov[0].iov_len = 24;
+	piov[0].iov_base = bp;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the data count to arrive */
+	ret = rxrpc_call_read_data(call,bp,4,RXRPC_CALL_READ_BLOCK);
+	if (ret<0)
+		goto read_failed;
+
+	desc->actual = ntohl(bp[0]);
+	if (desc->actual!=desc->size) {
+		ret = -EBADMSG;
+		goto abort;
+	}
+
+	/* call the app to read the actual data */
+	rxrpc_call_reset_scratch(call);
+
+	ret = rxrpc_call_read_data(call,desc->buffer,desc->actual,RXRPC_CALL_READ_BLOCK);
+	if (ret<0)
+		goto read_failed;
+
+	/* wait for the rest of the reply to completely arrive */
+	rxrpc_call_reset_scratch(call);
+	bp = rxrpc_call_alloc_scratch(call,120);
+
+	ret = rxrpc_call_read_data(call,bp,120,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0)
+		goto read_failed;
+
+	/* unmarshall the reply */
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= desc->fid.vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	vnode->cb_version		= ntohl(*bp++);
+	vnode->cb_expiry		= ntohl(*bp++);
+	vnode->cb_type			= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ read_failed:
+	if (ret==-ECONNABORTED) {
+		ret = call->app_errno;
+		goto out_unwait;
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+
+} /* end afs_rxfs_fetch_file_data() */
+
+/*****************************************************************************/
+/*
+ * ask the AFS fileserver to discard a callback request on a file
+ */
+int afs_rxfs_give_up_callback(afs_server_t *server, afs_vnode_t *vnode)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{%u,%u,%u}",server,vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGIVEUPCALLBACKS;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,(1+4+4)*4);
+
+	piov[0].iov_len = (1+4+4)*4;
+	piov[0].iov_base = bp;
+
+	*bp++ = htonl(FSGIVEUPCALLBACKS);
+	*bp++ = htonl(1);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(1);
+	*bp++ = htonl(vnode->cb_version);
+	*bp++ = htonl(vnode->cb_expiry);
+	*bp++ = htonl(vnode->cb_type);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_give_up_callback() */
+
+/*****************************************************************************/
+/*
+ * look a filename up in a directory
+ * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ */
+#if 0
+int afs_rxfs_lookup(afs_server_t *server,
+		    afs_vnode_t *dir,
+		    const char *filename,
+		    afs_vnode_t *vnode,
+		    afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	size_t sent;
+	int ret;
+	u32 *bp, zero;
+
+	kenter("%p,{%u,%u,%u},%s",server,fid->vid,fid->vnode,fid->unique,filename);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSLOOKUP;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,20);
+
+	zero = 0;
+
+	piov[0].iov_len = 20;
+	piov[0].iov_base = bp;
+	piov[1].iov_len = strlen(filename);
+	piov[1].iov_base = (char*) filename;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	*bp++ = htonl(FSLOOKUP);
+	*bp++ = htonl(dirfid->vid);
+	*bp++ = htonl(dirfid->vnode);
+	*bp++ = htonl(dirfid->unique);
+	*bp++ = htonl(piov[1].iov_len);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,220);
+
+	ret = rxrpc_call_read_data(call,bp,220,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	fid->vid		= ntohl(*bp++);
+	fid->vnode		= ntohl(*bp++);
+	fid->unique		= ntohl(*bp++);
+
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= dirfid->vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	dir->status.if_version		= ntohl(*bp++);
+	dir->status.type			= ntohl(*bp++);
+	dir->status.nlink		= ntohl(*bp++);
+	dir->status.size			= ntohl(*bp++);
+	dir->status.version		= ntohl(*bp++);
+	dir->status.author		= ntohl(*bp++);
+	dir->status.owner		= ntohl(*bp++);
+	dir->status.caller_access	= ntohl(*bp++);
+	dir->status.anon_access		= ntohl(*bp++);
+	dir->status.mode			= ntohl(*bp++);
+	dir->status.parent.vid		= dirfid->vid;
+	dir->status.parent.vnode		= ntohl(*bp++);
+	dir->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	dir->status.mtime_client		= ntohl(*bp++);
+	dir->status.mtime_server		= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	dir->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	callback->fid		= *fid;
+	callback->version	= ntohl(*bp++);
+	callback->expiry	= ntohl(*bp++);
+	callback->type		= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	kleave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_lookup() */
+#endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
new file mode 100644
index 000000000000..0931a5b1be8f
--- /dev/null
+++ b/fs/afs/fsclient.h
@@ -0,0 +1,53 @@
+/* fsclient.h: AFS File Server client stub declarations
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_FSCLIENT_H
+#define _LINUX_AFS_FSCLIENT_H
+
+#include "server.h"
+
+extern int afs_rxfs_get_volume_info(afs_server_t *server,
+				    const char *name,
+				    afs_volume_info_t *vinfo);
+
+extern int afs_rxfs_fetch_file_status(afs_server_t *server,
+				      afs_vnode_t *vnode,
+				      afs_volsync_t *volsync);
+
+struct afs_rxfs_fetch_descriptor {
+	afs_fid_t	fid;		/* file ID to fetch */
+	size_t		size;		/* total number of bytes to fetch */
+	off_t		offset;		/* offset in file to start from */
+	void		*buffer;	/* read buffer */
+	size_t		actual;		/* actual size sent back by server */
+};
+
+extern int afs_rxfs_fetch_file_data(afs_server_t *server,
+				    afs_vnode_t *vnode,
+				    struct afs_rxfs_fetch_descriptor *desc,
+				    afs_volsync_t *volsync);
+
+extern int afs_rxfs_give_up_callback(afs_server_t *server, afs_vnode_t *vnode);
+
+/* this doesn't appear to work in OpenAFS server */
+extern int afs_rxfs_lookup(afs_server_t *server,
+			   afs_vnode_t *dir,
+			   const char *filename,
+			   afs_vnode_t *vnode,
+			   afs_volsync_t *volsync);
+
+/* this is apparently mis-implemented in OpenAFS server */
+extern int afs_rxfs_get_root_volume(afs_server_t *server,
+				    char *buf,
+				    size_t *buflen);
+
+
+#endif /* _LINUX_AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
new file mode 100644
index 000000000000..235b7b0bcf5e
--- /dev/null
+++ b/fs/afs/inode.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include "super.h"
+#include "internal.h"
+
+struct afs_iget_data {
+	afs_fid_t		fid;
+	afs_volume_t		*volume;	/* volume on which resides */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+	afs_vnode_t		*new_vnode;	/* new vnode record */
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * map the AFS file status to the inode member variables
+ */
+static int afs_inode_map_status(afs_vnode_t *vnode)
+{
+	struct inode *inode = AFS_VNODE_TO_I(vnode);
+
+	_debug("FS: ft=%d lk=%d sz=%u ver=%Lu mod=%hu",
+	       vnode->status.type,
+	       vnode->status.nlink,
+	       vnode->status.size,
+	       vnode->status.version,
+	       vnode->status.mode);
+
+	switch (vnode->status.type) {
+	case AFS_FTYPE_FILE:
+		inode->i_mode	= S_IFREG | vnode->status.mode;
+		inode->i_op	= &afs_file_inode_operations;
+		inode->i_fop	= &afs_file_file_operations;
+		break;
+	case AFS_FTYPE_DIR:
+		inode->i_mode	= S_IFDIR | vnode->status.mode;
+		inode->i_op	= &afs_dir_inode_operations;
+		inode->i_fop	= &afs_dir_file_operations;
+		break;
+	case AFS_FTYPE_SYMLINK:
+		inode->i_mode	= S_IFLNK | vnode->status.mode;
+		inode->i_op	= &page_symlink_inode_operations;
+		break;
+	default:
+		printk("kAFS: AFS vnode with undefined type\n");
+		return -EBADMSG;
+	}
+
+	inode->i_nlink		= vnode->status.nlink;
+	inode->i_uid		= vnode->status.owner;
+	inode->i_gid		= 0;
+	inode->i_rdev		= NODEV;
+	inode->i_size		= vnode->status.size;
+	inode->i_atime		= inode->i_mtime = inode->i_ctime = vnode->status.mtime_server;
+	inode->i_blksize	= PAGE_CACHE_SIZE;
+	inode->i_blocks		= 0;
+	inode->i_version	= vnode->fid.unique;
+	inode->i_mapping->a_ops	= &afs_fs_aops;
+
+	/* check to see whether a symbolic link is really a mountpoint */
+	if (vnode->status.type==AFS_FTYPE_SYMLINK) {
+		afs_mntpt_check_symlink(vnode);
+
+		if (vnode->flags & AFS_VNODE_MOUNTPOINT) {
+			inode->i_mode	= S_IFDIR | vnode->status.mode;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		}
+	}
+
+	return 0;
+} /* end afs_inode_map_status() */
+
+/*****************************************************************************/
+/*
+ * attempt to fetch the status of an inode, coelescing multiple simultaneous fetches
+ */
+int afs_inode_fetch_status(struct inode *inode)
+{
+	afs_vnode_t *vnode;
+	int ret;
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_vnode_fetch_status(vnode);
+
+	if (ret==0)
+		ret = afs_inode_map_status(vnode);
+
+	return ret;
+
+} /* end afs_inode_fetch_status() */
+
+/*****************************************************************************/
+/*
+ * iget5() comparator
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static int afs_iget5_test(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+
+	/* only match inodes with the same version number */
+	return inode->i_ino==data->fid.vnode && inode->i_version==data->fid.unique;
+} /* end afs_iget5_test() */
+#endif
+
+/*****************************************************************************/
+/*
+ * iget5() inode initialiser
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static int afs_iget5_set(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+	afs_vnode_t *vnode = AFS_FS_I(inode);
+
+	inode->i_ino = data->fid.vnode;
+	inode->i_version = data->fid.unique;
+	vnode->fid = data->fid;
+	vnode->volume = data->volume;
+
+	return 0;
+} /* end afs_iget5_set() */
+#endif
+
+/*****************************************************************************/
+/*
+ * iget4() comparator
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+static int afs_iget4_test(struct inode *inode, ino_t ino, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+
+	/* only match inodes with the same version number */
+	return inode->i_ino==data->fid.vnode && inode->i_version==data->fid.unique;
+} /* end afs_iget4_test() */
+#endif
+
+/*****************************************************************************/
+/*
+ * read an inode (2.4 only)
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+void afs_read_inode2(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+	afs_vnode_t *vnode;
+	int ret;
+
+	_enter(",{{%u,%u,%u},%p}",data->fid.vid,data->fid.vnode,data->fid.unique,data->volume);
+
+	if (inode->u.generic_ip) BUG();
+
+	/* attach a pre-allocated vnode record */
+	inode->u.generic_ip = vnode = data->new_vnode;
+	data->new_vnode = NULL;
+
+	memset(vnode,0,sizeof(*vnode));
+	vnode->inode = inode;
+	init_waitqueue_head(&vnode->update_waitq);
+	spin_lock_init(&vnode->lock);
+	INIT_LIST_HEAD(&vnode->cb_link);
+	INIT_LIST_HEAD(&vnode->cb_hash_link);
+	afs_timer_init(&vnode->cb_timeout,&afs_vnode_cb_timed_out_ops);
+	vnode->flags |= AFS_VNODE_CHANGED;
+	vnode->volume = data->volume;
+	vnode->fid = data->fid;
+
+	/* ask the server for a status check */
+	ret = afs_vnode_fetch_status(vnode);
+	if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" [bad inode]");
+		return;
+	}
+
+	ret = afs_inode_map_status(vnode);
+	if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" [bad inode]");
+		return;
+	}
+
+	_leave("");
+	return;
+} /* end afs_read_inode2() */
+#endif
+
+/*****************************************************************************/
+/*
+ * inode retrieval
+ */
+inline int afs_iget(struct super_block *sb, afs_fid_t *fid, struct inode **_inode)
+{
+	struct afs_iget_data data = { fid: *fid };
+	struct afs_super_info *as;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	_enter(",{%u,%u,%u},,",fid->vid,fid->vnode,fid->unique);
+
+	as = sb->s_fs_info;
+	data.volume = as->volume;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	inode = iget5_locked(sb,fid->vnode,afs_iget5_test,afs_iget5_set,&data);
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	vnode = AFS_FS_I(inode);
+
+	/* deal with an existing inode */
+	if (!(inode->i_state & I_NEW)) {
+		ret = afs_vnode_fetch_status(vnode);
+		if (ret==0)
+			*_inode = inode;
+		else
+			iput(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* okay... it's a new inode */
+	vnode->flags |= AFS_VNODE_CHANGED;
+	ret = afs_inode_fetch_status(inode);
+	if (ret<0)
+		goto bad_inode;
+
+#if 0
+	/* find a cache entry for it */
+	ret = afs_cache_lookup_vnode(as->volume,vnode);
+	if (ret<0)
+		goto bad_inode;
+#endif
+
+	/* success */
+	unlock_new_inode(inode);
+
+	*_inode = inode;
+	_leave(" = 0 [CB { v=%u x=%lu t=%u nix=%u }]",
+	       vnode->cb_version,
+	       vnode->cb_timeout.timo_jif,
+	       vnode->cb_type,
+	       vnode->nix
+	       );
+	return 0;
+
+	/* failure */
+ bad_inode:
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+
+	_leave(" = %d [bad]",ret);
+	return ret;
+
+#else
+
+	/* pre-allocate a vnode record so that afs_read_inode2() doesn't have to return an inode
+	 * without one attached
+	 */
+	data.new_vnode = kmalloc(sizeof(afs_vnode_t),GFP_KERNEL);
+	if (!data.new_vnode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	inode = iget4(sb,fid->vnode,afs_iget4_test,&data);
+	if (data.new_vnode) kfree(data.new_vnode); 
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	vnode = AFS_FS_I(inode);
+	*_inode = inode;
+	_leave(" = 0 [CB { v=%u x=%lu t=%u nix=%u }]",
+	       vnode->cb_version,
+	       vnode->cb_timeout.timo_jif,
+	       vnode->cb_type,
+	       vnode->nix
+	       );
+	return 0;
+#endif
+} /* end afs_iget() */
+
+/*****************************************************************************/
+/*
+ * read the attributes of an inode
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = dentry->d_inode;
+
+	_enter("{ ino=%lu v=%lu }",inode->i_ino,inode->i_version);
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_inode_fetch_status(inode);
+	if (ret==-ENOENT) {
+		_leave(" = %d [%d %p]",ret,atomic_read(&dentry->d_count),dentry->d_inode);
+		return ret;
+	}
+	else if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* transfer attributes from the inode structure to the stat structure */
+	generic_fillattr(inode,stat);
+
+	_leave(" = 0 CB { v=%u x=%u t=%u }",
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type);
+
+	return 0;
+} /* end afs_inode_getattr() */
+#endif
+
+/*****************************************************************************/
+/*
+ * revalidate the inode
+ */
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+int afs_inode_revalidate(struct dentry *dentry)
+{
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = dentry->d_inode;
+
+	_enter("{ ino=%lu v=%lu }",inode->i_ino,inode->i_version);
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_inode_fetch_status(inode);
+	if (ret==-ENOENT) {
+		_leave(" = %d [%d %p]",ret,atomic_read(&dentry->d_count),dentry->d_inode);
+		return ret;
+	}
+	else if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	_leave(" = 0 CB { v=%u x=%u t=%u }",
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type);
+
+	return 0;
+} /* end afs_inode_revalidate() */
+#endif
+
+/*****************************************************************************/
+/*
+ * clear an AFS inode
+ */
+void afs_clear_inode(struct inode *inode)
+{
+	afs_vnode_t *vnode;
+
+	vnode = AFS_FS_I(inode);
+
+	_enter("ino=%lu { vn=%08x v=%u x=%u t=%u }",
+	       inode->i_ino,
+	       vnode->fid.vnode,
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type
+	       );
+
+	if (inode->i_ino!=vnode->fid.vnode) BUG();
+
+	afs_vnode_give_up_callback(vnode);
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+	if (inode->u.generic_ip) kfree(inode->u.generic_ip);
+#endif
+
+	_leave("");
+} /* end afs_clear_inode() */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
new file mode 100644
index 000000000000..37f84bb11891
--- /dev/null
+++ b/fs/afs/internal.h
@@ -0,0 +1,127 @@
+/* internal.h: internal AFS stuff
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_INTERNAL_H
+#define AFS_INTERNAL_H
+
+#include <linux/version.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * debug tracing
+ */
+#define kenter(FMT,...)	printk("==> %s("FMT")\n",__FUNCTION__,##__VA_ARGS__)
+#define kleave(FMT,...)	printk("<== %s()"FMT"\n",__FUNCTION__,##__VA_ARGS__)
+#define kdebug(FMT,...)	printk(FMT"\n",##__VA_ARGS__)
+#define kproto(FMT,...)	printk("### "FMT"\n",##__VA_ARGS__)
+#define knet(FMT,...)	printk(FMT"\n",##__VA_ARGS__)
+
+#if 0
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...)	kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...)	knet(FMT,##__VA_ARGS__)
+#else
+#define _enter(FMT,...)	do { } while(0)
+#define _leave(FMT,...)	do { } while(0)
+#define _debug(FMT,...)	do { } while(0)
+#define _proto(FMT,...)	do { } while(0)
+#define _net(FMT,...)	do { } while(0)
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+#define wait_on_page_locked wait_on_page
+#define PageUptodate Page_Uptodate
+
+static inline struct proc_dir_entry *PDE(const struct inode *inode)
+{
+	return (struct proc_dir_entry *)inode->u.generic_ip;
+}
+#endif
+
+static inline void afs_discard_my_signals(void)
+{
+	while (signal_pending(current)) {
+		siginfo_t sinfo;
+
+		spin_lock_irq(&current->sig->siglock);
+		dequeue_signal(&current->blocked,&sinfo);
+		spin_unlock_irq(&current->sig->siglock);
+	}
+}
+
+/*
+ * cell.c
+ */
+extern struct rw_semaphore afs_proc_cells_sem;
+extern struct list_head afs_proc_cells;
+
+/*
+ * dir.c
+ */
+extern struct inode_operations afs_dir_inode_operations;
+extern struct file_operations afs_dir_file_operations;
+
+/*
+ * file.c
+ */
+extern struct address_space_operations afs_fs_aops;
+extern struct inode_operations afs_file_inode_operations;
+extern struct file_operations afs_file_file_operations;
+
+/*
+ * inode.c
+ */
+extern int afs_iget(struct super_block *sb, afs_fid_t *fid, struct inode **_inode);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+extern int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+#else
+extern void afs_read_inode2(struct inode *inode, void *opaque);
+extern int afs_inode_revalidate(struct dentry *dentry);
+#endif
+extern void afs_clear_inode(struct inode *inode);
+
+/*
+ * mntpt.c
+ */
+extern struct inode_operations afs_mntpt_inode_operations;
+extern struct file_operations afs_mntpt_file_operations;
+
+extern int afs_mntpt_check_symlink(afs_vnode_t *vnode);
+
+/*
+ * super.c
+ */
+extern int afs_fs_init(void);
+extern void afs_fs_exit(void);
+
+#define AFS_CB_HASH_COUNT (PAGE_SIZE/sizeof(struct list_head))
+
+extern struct list_head afs_cb_hash_tbl[];
+extern spinlock_t afs_cb_hash_lock;
+
+#define afs_cb_hash(SRV,FID) \
+	afs_cb_hash_tbl[((unsigned)(SRV) + (FID)->vid + (FID)->vnode + (FID)->unique) % \
+			AFS_CB_HASH_COUNT]
+
+/*
+ * proc.c
+ */
+extern int afs_proc_init(void);
+extern void afs_proc_cleanup(void);
+extern int afs_proc_cell_setup(afs_cell_t *cell);
+extern void afs_proc_cell_remove(afs_cell_t *cell);
+
+#endif /* AFS_INTERNAL_H */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
new file mode 100644
index 000000000000..2891e98be91d
--- /dev/null
+++ b/fs/afs/kafsasyncd.c
@@ -0,0 +1,260 @@
+/* kafsasyncd.c: AFS asynchronous operation daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * The AFS async daemon is used to the following:
+ * - probe "dead" servers to see whether they've come back to life yet.
+ * - probe "live" servers that we haven't talked to for a while to see if they are better
+ *   candidates for serving than what we're currently using
+ * - poll volume location servers to keep up to date volume location lists
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "cell.h"
+#include "server.h"
+#include "volume.h"
+#include "kafsasyncd.h"
+#include "kafstimod.h"
+#include <rxrpc/call.h>
+#include <asm/errno.h>
+#include "internal.h"
+
+static DECLARE_COMPLETION(kafsasyncd_alive);
+static DECLARE_COMPLETION(kafsasyncd_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafsasyncd_sleepq);
+static struct task_struct *kafsasyncd_task;
+static int kafsasyncd_die;
+
+static int kafsasyncd(void *arg);
+
+static LIST_HEAD(kafsasyncd_async_attnq);
+static LIST_HEAD(kafsasyncd_async_busyq);
+static spinlock_t kafsasyncd_async_lock = SPIN_LOCK_UNLOCKED;
+
+static void kafsasyncd_null_call_attn_func(struct rxrpc_call *call)
+{
+}
+
+static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
+{
+}
+
+/*****************************************************************************/
+/*
+ * start the async daemon
+ */
+int afs_kafsasyncd_start(void)
+{
+	int ret;
+
+	ret = kernel_thread(kafsasyncd,NULL,0);
+	if (ret<0)
+		return ret;
+
+	wait_for_completion(&kafsasyncd_alive);
+
+	return ret;
+} /* end afs_kafsasyncd_start() */
+
+/*****************************************************************************/
+/*
+ * stop the async daemon
+ */
+void afs_kafsasyncd_stop(void)
+{
+	/* get rid of my daemon */
+	kafsasyncd_die = 1;
+	wake_up(&kafsasyncd_sleepq);
+	wait_for_completion(&kafsasyncd_dead);
+
+} /* end afs_kafsasyncd_stop() */
+
+/*****************************************************************************/
+/*
+ * probing daemon
+ */
+static int kafsasyncd(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct list_head *_p;
+	int die;
+
+	kafsasyncd_task = current;
+
+	printk("kAFS: Started kafsasyncd %d\n",current->pid);
+	strcpy(current->comm,"kafsasyncd");
+
+	daemonize();
+
+	complete(&kafsasyncd_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&kafsasyncd_sleepq,&myself);
+
+		for (;;) {
+			if (!list_empty(&kafsasyncd_async_attnq) ||
+			    signal_pending(current) ||
+			    kafsasyncd_die)
+				break;
+
+			schedule();
+			set_current_state(TASK_INTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&kafsasyncd_sleepq,&myself);
+		set_current_state(TASK_RUNNING);
+
+		/* discard pending signals */
+		afs_discard_my_signals();
+
+		die = kafsasyncd_die;
+
+		/* deal with the next asynchronous operation requiring attention */
+		if (!list_empty(&kafsasyncd_async_attnq)) {
+			struct afs_async_op *op;
+
+			_debug("@@@ Begin Asynchronous Operation");
+
+			op = NULL;
+			spin_lock(&kafsasyncd_async_lock);
+
+			if (!list_empty(&kafsasyncd_async_attnq)) {
+				op = list_entry(kafsasyncd_async_attnq.next,afs_async_op_t,link);
+				list_del(&op->link);
+				list_add_tail(&op->link,&kafsasyncd_async_busyq);
+			}
+
+			spin_unlock(&kafsasyncd_async_lock);
+
+			_debug("@@@ Operation %p {%p}\n",op,op?op->ops:NULL);
+
+			if (op)
+				op->ops->attend(op);
+
+			_debug("@@@ End Asynchronous Operation");
+		}
+
+	} while(!die);
+
+	/* need to kill all outstanding asynchronous operations before exiting */
+	kafsasyncd_task = NULL;
+	spin_lock(&kafsasyncd_async_lock);
+
+	/* fold the busy and attention queues together */
+	list_splice(&kafsasyncd_async_busyq,&kafsasyncd_async_attnq);
+	list_del_init(&kafsasyncd_async_busyq);
+
+	/* dequeue kafsasyncd from all their wait queues */
+	list_for_each(_p,&kafsasyncd_async_attnq) {
+		afs_async_op_t *op = list_entry(_p,afs_async_op_t,link);
+
+		op->call->app_attn_func = kafsasyncd_null_call_attn_func;
+		op->call->app_error_func = kafsasyncd_null_call_error_func;
+		remove_wait_queue(&op->call->waitq,&op->waiter);
+	}
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	/* abort all the operations */
+	while (!list_empty(&kafsasyncd_async_attnq)) {
+		afs_async_op_t *op = list_entry(_p,afs_async_op_t,link);
+		list_del_init(&op->link);
+
+		rxrpc_call_abort(op->call,-EIO);
+		rxrpc_put_call(op->call);
+		op->call = NULL;
+
+		op->ops->discard(op);
+	}
+
+	/* and that's all */
+	_leave("");
+	complete_and_exit(&kafsasyncd_dead,0);
+
+} /* end kafsasyncd() */
+
+/*****************************************************************************/
+/*
+ * begin an operation
+ * - place operation on busy queue
+ */
+void afs_kafsasyncd_begin_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	init_waitqueue_entry(&op->waiter,kafsasyncd_task);
+
+	list_del(&op->link);
+	list_add_tail(&op->link,&kafsasyncd_async_busyq);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	_leave("");
+} /* end afs_kafsasyncd_begin_op() */
+
+/*****************************************************************************/
+/*
+ * request attention for an operation
+ * - move to attention queue
+ */
+void afs_kafsasyncd_attend_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	list_del(&op->link);
+	list_add_tail(&op->link,&kafsasyncd_async_attnq);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	wake_up(&kafsasyncd_sleepq);
+
+	_leave("");
+} /* end afs_kafsasyncd_attend_op() */
+
+/*****************************************************************************/
+/*
+ * terminate an operation
+ * - remove from either queue
+ */
+void afs_kafsasyncd_terminate_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	list_del_init(&op->link);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	wake_up(&kafsasyncd_sleepq);
+
+	_leave("");
+} /* end afs_kafsasyncd_terminate_op() */
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
new file mode 100644
index 000000000000..6438c17833a1
--- /dev/null
+++ b/fs/afs/kafsasyncd.h
@@ -0,0 +1,49 @@
+/* kafsasyncd.h: AFS asynchronous operation daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_KAFSASYNCD_H
+#define _LINUX_AFS_KAFSASYNCD_H
+
+#include "types.h"
+
+struct afs_async_op_ops {
+	void (*attend)(afs_async_op_t *op);
+	void (*discard)(afs_async_op_t *op);
+};
+
+/*****************************************************************************/
+/*
+ * asynchronous operation record
+ */
+struct afs_async_op
+{
+	struct list_head		link;
+	afs_server_t			*server;	/* server being contacted */
+	struct rxrpc_call		*call;		/* RxRPC call performing op */
+	wait_queue_t			waiter;		/* wait queue for kafsasyncd */
+	const struct afs_async_op_ops	*ops;		/* operations */
+};
+
+static inline void afs_async_op_init(afs_async_op_t *op, const struct afs_async_op_ops *ops)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->call = NULL;
+	op->ops = ops;
+}
+
+extern int afs_kafsasyncd_start(void);
+extern void afs_kafsasyncd_stop(void);
+
+extern void afs_kafsasyncd_begin_op(afs_async_op_t *op);
+extern void afs_kafsasyncd_attend_op(afs_async_op_t *op);
+extern void afs_kafsasyncd_terminate_op(afs_async_op_t *op);
+
+#endif /* _LINUX_AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
new file mode 100644
index 000000000000..ccc1b4e6b94d
--- /dev/null
+++ b/fs/afs/kafstimod.c
@@ -0,0 +1,211 @@
+/* kafstimod.c: AFS timeout daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "cell.h"
+#include "volume.h"
+#include "kafstimod.h"
+#include <asm/errno.h>
+#include "internal.h"
+
+static DECLARE_COMPLETION(kafstimod_alive);
+static DECLARE_COMPLETION(kafstimod_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafstimod_sleepq);
+static int kafstimod_die;
+
+static LIST_HEAD(kafstimod_list);
+static spinlock_t kafstimod_lock = SPIN_LOCK_UNLOCKED;
+
+static int kafstimod(void *arg);
+
+/*****************************************************************************/
+/*
+ * start the timeout daemon
+ */
+int afs_kafstimod_start(void)
+{
+	int ret;
+
+	ret = kernel_thread(kafstimod,NULL,0);
+	if (ret<0)
+		return ret;
+
+	wait_for_completion(&kafstimod_alive);
+
+	return ret;
+} /* end afs_kafstimod_start() */
+
+/*****************************************************************************/
+/*
+ * stop the timeout daemon
+ */
+void afs_kafstimod_stop(void)
+{
+	/* get rid of my daemon */
+	kafstimod_die = 1;
+	wake_up(&kafstimod_sleepq);
+	wait_for_completion(&kafstimod_dead);
+
+} /* end afs_kafstimod_stop() */
+
+/*****************************************************************************/
+/*
+ * timeout processing daemon
+ */
+static int kafstimod(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	afs_timer_t *timer;
+
+	printk("kAFS: Started kafstimod %d\n",current->pid);
+	strcpy(current->comm,"kafstimod");
+
+	daemonize();
+
+	complete(&kafstimod_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+ loop:
+	set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&kafstimod_sleepq,&myself);
+
+	for (;;) {
+		unsigned long jif;
+		signed long timeout;
+
+		/* deal with the server being asked to die */
+		if (kafstimod_die) {
+			remove_wait_queue(&kafstimod_sleepq,&myself);
+			_leave("");
+			complete_and_exit(&kafstimod_dead,0);
+		}
+
+		/* discard pending signals */
+		afs_discard_my_signals();
+
+		/* work out the time to elapse before the next event */
+		spin_lock(&kafstimod_lock);
+		if (list_empty(&kafstimod_list)) {
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		}
+		else {
+			timer = list_entry(kafstimod_list.next,afs_timer_t,link);
+			timeout = timer->timo_jif;
+			jif = jiffies;
+
+			if (time_before_eq(timeout,jif))
+				goto immediate;
+
+			else {
+				timeout = (long)timeout - (long)jiffies;
+			}
+		}
+		spin_unlock(&kafstimod_lock);
+
+		schedule_timeout(timeout);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	/* the thing on the front of the queue needs processing
+	 * - we come here with the lock held and timer pointing to the expired entry
+	 */
+ immediate:
+	remove_wait_queue(&kafstimod_sleepq,&myself);
+	set_current_state(TASK_RUNNING);
+
+	_debug("@@@ Begin Timeout of %p",timer);
+
+	/* dequeue the timer */
+	list_del_init(&timer->link);
+	spin_unlock(&kafstimod_lock);
+
+	/* call the timeout function */
+	timer->ops->timed_out(timer);
+
+	_debug("@@@ End Timeout");
+	goto loop;
+
+} /* end kafstimod() */
+
+/*****************************************************************************/
+/*
+ * (re-)queue a timer
+ */
+void afs_kafstimod_add_timer(afs_timer_t *timer, unsigned long timeout)
+{
+	struct list_head *_p;
+	afs_timer_t *ptimer;
+
+	_enter("%p,%lu",timer,timeout);
+
+	spin_lock(&kafstimod_lock);
+
+	list_del(&timer->link);
+
+	/* the timer was deferred or reset - put it back in the queue at the right place */
+	timer->timo_jif = jiffies + timeout;
+
+	list_for_each(_p,&kafstimod_list) {
+		ptimer = list_entry(_p,afs_timer_t,link);
+		if (time_before(timer->timo_jif,ptimer->timo_jif))
+			break;
+	}
+
+	list_add_tail(&timer->link,_p); /* insert before stopping point */
+
+	spin_unlock(&kafstimod_lock);
+
+	wake_up(&kafstimod_sleepq);
+
+	_leave("");
+} /* end afs_kafstimod_queue_vlocation() */
+
+/*****************************************************************************/
+/*
+ * dequeue a timer
+ * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
+ */
+int afs_kafstimod_del_timer(afs_timer_t *timer)
+{
+	int ret = 0;
+
+	_enter("%p",timer);
+
+	spin_lock(&kafstimod_lock);
+
+	if (list_empty(&timer->link))
+		ret = -ENOENT;
+	else
+		list_del_init(&timer->link);
+
+	spin_unlock(&kafstimod_lock);
+
+	wake_up(&kafstimod_sleepq);
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_kafstimod_del_timer() */
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
new file mode 100644
index 000000000000..342d81d6025b
--- /dev/null
+++ b/fs/afs/kafstimod.h
@@ -0,0 +1,45 @@
+/* kafstimod.h: AFS timeout daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_KAFSTIMOD_H
+#define _LINUX_AFS_KAFSTIMOD_H
+
+#include "types.h"
+
+struct afs_timer_ops {
+	/* called when the front of the timer queue has timed out */
+	void (*timed_out)(struct afs_timer *timer);
+};
+
+/*****************************************************************************/
+/*
+ * AFS timer/timeout record
+ */
+struct afs_timer
+{
+	struct list_head		link;		/* link in timer queue */
+	unsigned long			timo_jif;	/* timeout time */
+	const struct afs_timer_ops	*ops;		/* timeout expiry function */
+};
+
+static inline void afs_timer_init(afs_timer_t *timer, const struct afs_timer_ops *ops)
+{
+	INIT_LIST_HEAD(&timer->link);
+	timer->ops = ops;
+}
+
+extern int afs_kafstimod_start(void);
+extern void afs_kafstimod_stop(void);
+
+extern void afs_kafstimod_add_timer(afs_timer_t *timer, unsigned long timeout);
+extern int afs_kafstimod_del_timer(afs_timer_t *timer);
+
+#endif /* _LINUX_AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
new file mode 100644
index 000000000000..dc20f670a021
--- /dev/null
+++ b/fs/afs/main.c
@@ -0,0 +1,193 @@
+/* main.c: AFS client file system
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/call.h>
+#include <rxrpc/peer.h>
+#include "cell.h"
+#include "server.h"
+#include "fsclient.h"
+#include "cmservice.h"
+#include "kafstimod.h"
+#include "kafsasyncd.h"
+#include "internal.h"
+
+struct rxrpc_transport *afs_transport;
+
+static int afs_init(void);
+static void afs_exit(void);
+static int afs_adding_peer(struct rxrpc_peer *peer);
+static void afs_discarding_peer(struct rxrpc_peer *peer);
+
+module_init(afs_init);
+module_exit(afs_exit);
+
+MODULE_DESCRIPTION("AFS Client File System");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+static struct rxrpc_peer_ops afs_peer_ops = {
+	.adding		= afs_adding_peer,
+	.discarding	= afs_discarding_peer,
+};
+
+struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
+spinlock_t afs_cb_hash_lock = SPIN_LOCK_UNLOCKED;
+
+/*****************************************************************************/
+/*
+ * initialise the AFS client FS module
+ */
+static int afs_init(void)
+{
+	int loop, ret;
+
+	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+
+	/* initialise the callback hash table */
+	spin_lock_init(&afs_cb_hash_lock);
+	for (loop=AFS_CB_HASH_COUNT-1; loop>=0; loop--)
+		INIT_LIST_HEAD(&afs_cb_hash_tbl[loop]);
+
+	/* register the /proc stuff */
+	ret = afs_proc_init();
+	if (ret<0)
+		return ret;
+
+	/* initialise the cell DB */
+	ret = afs_cell_init();
+	if (ret<0)
+		goto error;
+
+	/* start the timeout daemon */
+	ret = afs_kafstimod_start();
+	if (ret<0)
+		goto error;
+
+	/* start the async operation daemon */
+	ret = afs_kafsasyncd_start();
+	if (ret<0)
+		goto error_kafstimod;
+
+	/* create the RxRPC transport */
+	ret = rxrpc_create_transport(7001,&afs_transport);
+	if (ret<0)
+		goto error_kafsasyncd;
+
+	afs_transport->peer_ops = &afs_peer_ops;
+
+	/* register the filesystems */
+	ret = afs_fs_init();
+	if (ret<0)
+		goto error_transport;
+
+	return ret;
+
+ error_transport:
+	rxrpc_put_transport(afs_transport);
+ error_kafsasyncd:
+	afs_kafsasyncd_stop();
+ error_kafstimod:
+	afs_kafstimod_stop();
+ error:
+	afs_cell_purge();
+	afs_proc_cleanup();
+	printk(KERN_ERR "kAFS: failed to register: %d\n",ret);
+	return ret;
+} /* end afs_init() */
+
+/*****************************************************************************/
+/*
+ * clean up on module removal
+ */
+static void afs_exit(void)
+{
+	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
+
+	afs_fs_exit();
+	rxrpc_put_transport(afs_transport);
+	afs_kafstimod_stop();
+	afs_kafsasyncd_stop();
+	afs_cell_purge();
+	afs_proc_cleanup();
+
+} /* end afs_exit() */
+
+/*****************************************************************************/
+/*
+ * notification that new peer record is being added
+ * - called from krxsecd
+ * - return an error to induce an abort
+ * - mustn't sleep (caller holds an rwlock)
+ */
+static int afs_adding_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+	int ret;
+
+	_debug("kAFS: Adding new peer %08x\n",ntohl(peer->addr.s_addr));
+
+	/* determine which server the peer resides in (if any) */
+	ret = afs_server_find_by_peer(peer,&server);
+	if (ret<0)
+		return ret; /* none that we recognise, so abort */
+
+	_debug("Server %p{u=%d}\n",server,atomic_read(&server->usage));
+
+	_debug("Cell %p{u=%d}\n",server->cell,atomic_read(&server->cell->usage));
+
+	/* cross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	peer->user = server;
+	server->peer = peer;
+	spin_unlock(&afs_server_peer_lock);
+
+	afs_put_server(server);
+
+	return 0;
+} /* end afs_adding_peer() */
+
+/*****************************************************************************/
+/*
+ * notification that a peer record is being discarded
+ * - called from krxiod or krxsecd
+ */
+static void afs_discarding_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+
+	_enter("%p",peer);
+
+	_debug("Discarding peer %08x (rtt=%lu.%lumS)\n",
+	       ntohl(peer->addr.s_addr),
+	       peer->rtt/1000,
+	       peer->rtt%1000);
+
+	/* uncross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	server = peer->user;
+	if (server) {
+		peer->user = NULL;
+		server->peer = NULL;
+
+		//_debug("Server %p{u=%d}\n",server,atomic_read(&server->usage));
+		//_debug("Cell %p{u=%d}\n",server->cell,atomic_read(&server->cell->usage));
+	}
+	spin_unlock(&afs_server_peer_lock);
+
+	_leave("");
+
+} /* end afs_discarding_peer() */
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
new file mode 100644
index 000000000000..e4fce66d76e0
--- /dev/null
+++ b/fs/afs/misc.c
@@ -0,0 +1,39 @@
+/* misc.c: miscellaneous bits
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include "errors.h"
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * convert an AFS abort code to a Linux error number
+ */
+int afs_abort_to_error(int abortcode)
+{
+	switch (abortcode) {
+	case VSALVAGE:		return -EIO;
+	case VNOVNODE:		return -ENOENT;
+	case VNOVOL:		return -ENXIO;
+	case VVOLEXISTS:	return -EEXIST;
+	case VNOSERVICE:	return -EIO;
+	case VOFFLINE:		return -ENOENT;
+	case VONLINE:		return -EEXIST;
+	case VDISKFULL:		return -ENOSPC;
+	case VOVERQUOTA:	return -EDQUOT;
+	case VBUSY:		return -EBUSY;
+	case VMOVED:		return -ENXIO;
+	default:		return -EIO;
+	}
+
+} /* end afs_abort_to_error() */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
new file mode 100644
index 000000000000..4e88180f1c09
--- /dev/null
+++ b/fs/afs/mntpt.c
@@ -0,0 +1,112 @@
+/* mntpt.c: mountpoint management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include "internal.h"
+
+
+static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry);
+static int afs_mntpt_open(struct inode *inode, struct file *file);
+
+struct file_operations afs_mntpt_file_operations = {
+	.open		= afs_mntpt_open,
+};
+
+struct inode_operations afs_mntpt_inode_operations = {
+	.lookup		= afs_mntpt_lookup,
+	.readlink	= page_readlink,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * check a symbolic link to see whether it actually encodes a mountpoint
+ * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
+ */
+int afs_mntpt_check_symlink(afs_vnode_t *vnode)
+{
+	struct page *page;
+	size_t size;
+	char *buf;
+	int ret;
+
+	_enter("{%u,%u}",vnode->fid.vnode,vnode->fid.unique);
+
+	/* read the contents of the symlink into the pagecache */
+	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping,0,
+			       (filler_t*)AFS_VNODE_TO_I(vnode)->i_mapping->a_ops->readpage,NULL);
+	if (IS_ERR(page)) {
+		ret = PTR_ERR(page);
+		goto out;
+	}
+
+	ret = -EIO;
+	wait_on_page_locked(page);
+	buf = kmap(page);
+	if (!PageUptodate(page))
+		goto out_free;
+	if (PageError(page))
+		goto out_free;
+
+	/* examine the symlink's contents */
+	size = vnode->status.size;
+	_debug("symlink to %*.*s",size,size,buf);
+
+	if (size>2 &&
+	    (buf[0]=='%' || buf[0]=='#') &&
+	    buf[size-1]=='.'
+	    ) {
+		_debug("symlink is a mountpoint");
+		spin_lock(&vnode->lock);
+		vnode->flags |= AFS_VNODE_MOUNTPOINT;
+		spin_unlock(&vnode->lock);
+	}
+
+	ret = 0;
+
+ out_free:
+	kunmap(page);
+	page_cache_release(page);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_mntpt_check_symlink() */
+
+/*****************************************************************************/
+/*
+ * no valid lookup procedure on this sort of dir
+ */
+static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry)
+{
+	return ERR_PTR(-EREMOTE);
+} /* end afs_mntpt_lookup() */
+
+/*****************************************************************************/
+/*
+ * no valid open procedure on this sort of dir
+ */
+static int afs_mntpt_open(struct inode *inode, struct file *file)
+{
+	return -EREMOTE;
+} /* end afs_mntpt_open() */
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
new file mode 100644
index 000000000000..fbdd77878546
--- /dev/null
+++ b/fs/afs/mount.h
@@ -0,0 +1,23 @@
+/* mount.h: mount parameters
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_MOUNT_H
+#define _LINUX_AFS_MOUNT_H
+
+struct afs_mountdata {
+	const char		*volume;	/* name of volume */
+	const char		*cell;		/* name of cell containing volume */
+	const char		*cache;		/* name of cache block device */
+	size_t			nservers;	/* number of server addresses listed */
+	u_int32_t		servers[10];	/* IP addresses of servers in this cell */
+};
+
+#endif /* _LINUX_AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
new file mode 100644
index 000000000000..83fda6decf57
--- /dev/null
+++ b/fs/afs/proc.c
@@ -0,0 +1,739 @@
+/* proc.c: /proc interface for AFS
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "cell.h"
+#include "volume.h"
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static struct proc_dir_entry *proc_afs;
+
+
+static int afs_proc_cells_open(struct inode *inode, struct file *file);
+static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cells_stop(struct seq_file *p, void *v);
+static int afs_proc_cells_show(struct seq_file *m, void *v);
+static ssize_t afs_proc_cells_write(struct file *file, const char *buf, size_t size, loff_t *_pos);
+
+static struct seq_operations afs_proc_cells_ops = {
+	.start	= afs_proc_cells_start,
+	.next	= afs_proc_cells_next,
+	.stop	= afs_proc_cells_stop,
+	.show	= afs_proc_cells_show,
+};
+
+static struct file_operations afs_proc_cells_fops = {
+	.open		= afs_proc_cells_open,
+	.read		= seq_read,
+	.write		= afs_proc_cells_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_volumes_ops = {
+	.start	= afs_proc_cell_volumes_start,
+	.next	= afs_proc_cell_volumes_next,
+	.stop	= afs_proc_cell_volumes_stop,
+	.show	= afs_proc_cell_volumes_show,
+};
+
+static struct file_operations afs_proc_cell_volumes_fops = {
+	.open		= afs_proc_cell_volumes_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_volumes_release,
+};
+
+static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_vlservers_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_vlservers_ops = {
+	.start	= afs_proc_cell_vlservers_start,
+	.next	= afs_proc_cell_vlservers_next,
+	.stop	= afs_proc_cell_vlservers_stop,
+	.show	= afs_proc_cell_vlservers_show,
+};
+
+static struct file_operations afs_proc_cell_vlservers_fops = {
+	.open		= afs_proc_cell_vlservers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_vlservers_release,
+};
+
+static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_servers_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_servers_ops = {
+	.start	= afs_proc_cell_servers_start,
+	.next	= afs_proc_cell_servers_next,
+	.stop	= afs_proc_cell_servers_stop,
+	.show	= afs_proc_cell_servers_show,
+};
+
+static struct file_operations afs_proc_cell_servers_fops = {
+	.open		= afs_proc_cell_servers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_servers_release,
+};
+
+/*****************************************************************************/
+/*
+ * initialise the /proc/fs/afs/ directory
+ */
+int afs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	_enter("");
+
+	proc_afs = proc_mkdir("fs/afs",NULL);
+	if (!proc_afs)
+		goto error;
+	proc_afs->owner = THIS_MODULE;
+
+	p = create_proc_entry("cells",0,proc_afs);
+	if (!p)
+		goto error_proc;
+	p->proc_fops = &afs_proc_cells_fops;
+	p->owner = THIS_MODULE;
+
+	_leave(" = 0");
+	return 0;
+
+#if 0
+ error_cells:
+	remove_proc_entry("cells",proc_afs);
+#endif
+ error_proc:
+	remove_proc_entry("fs/afs",NULL);
+ error:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+} /* end afs_proc_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the /proc/fs/afs/ directory
+ */
+void afs_proc_cleanup(void)
+{
+	remove_proc_entry("cells",proc_afs);
+
+	remove_proc_entry("fs/afs",NULL);
+
+} /* end afs_proc_cleanup() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/cells" which provides a summary of extant cells
+ */
+static int afs_proc_cells_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file,&afs_proc_cells_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end afs_proc_cells_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&afs_proc_cells_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&afs_proc_cells)
+		if (!pos--)
+			break;
+
+	return _p!=&afs_proc_cells ? _p : NULL;
+} /* end afs_proc_cells_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? afs_proc_cells.next : _p->next;
+
+	return _p!=&afs_proc_cells ? _p : NULL;
+} /* end afs_proc_cells_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cells_stop(struct seq_file *p, void *v)
+{
+	up_read(&afs_proc_cells_sem);
+
+} /* end afs_proc_cells_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of cell lines
+ */
+static int afs_proc_cells_show(struct seq_file *m, void *v)
+{
+	afs_cell_t *cell = list_entry(v,afs_cell_t,proc_link);
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE NAME\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%3d %s\n",atomic_read(&cell->usage),cell->name);
+
+	return 0;
+} /* end afs_proc_cells_show() */
+
+/*****************************************************************************/
+/*
+ * handle writes to /proc/fs/afs/cells
+ * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]*
+ */
+static ssize_t afs_proc_cells_write(struct file *file, const char *buf, size_t size, loff_t *_pos)
+{
+	char *kbuf, *name, *args;
+	int ret;
+
+	/* start by dragging the command into memory */
+	if (size<=1 || size>=PAGE_SIZE)
+		return -EINVAL;
+
+	kbuf = kmalloc(size+1,GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(kbuf,buf,size)!=0)
+		goto done;
+	kbuf[size] = 0;
+
+	/* trim to first NL */
+	name = memchr(kbuf,'\n',size);
+	if (name) *name = 0;
+
+	/* split into command, name and argslist */
+	name = strchr(kbuf,' ');
+	if (!name) goto inval;
+	do { *name++ = 0; } while(*name==' ');
+	if (!*name) goto inval;
+
+	args = strchr(name,' ');
+	if (!args) goto inval;
+	do { *args++ = 0; } while(*args==' ');
+	if (!*args) goto inval;
+
+	/* determine command to perform */
+	_debug("cmd=%s name=%s args=%s",kbuf,name,args);
+
+	if (strcmp(kbuf,"add")==0) {
+		afs_cell_t *cell;
+		ret = afs_cell_create(name,args,&cell);
+		if (ret<0)
+			goto done;
+
+		printk("kAFS: Added new cell '%s'\n",name);
+	}
+	else {
+		goto inval;
+	}
+
+	ret = size;
+
+ done:
+	kfree(kbuf);
+	_leave(" = %d",ret);
+	return ret;
+
+ inval:
+	ret = -EINVAL;
+	printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
+	goto done;
+} /* end afs_proc_cells_write() */
+
+/*****************************************************************************/
+/*
+ * initialise /proc/fs/afs/<cell>/
+ */
+int afs_proc_cell_setup(afs_cell_t *cell)
+{
+	struct proc_dir_entry *p;
+
+	_enter("%p{%s}",cell,cell->name);
+
+	cell->proc_dir = proc_mkdir(cell->name,proc_afs);
+	if (!cell->proc_dir)
+		return -ENOMEM;
+
+	p = create_proc_entry("servers",0,cell->proc_dir);
+	if (!p)
+		goto error_proc;
+	p->proc_fops = &afs_proc_cell_servers_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	p = create_proc_entry("vlservers",0,cell->proc_dir);
+	if (!p)
+		goto error_servers;
+	p->proc_fops = &afs_proc_cell_vlservers_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	p = create_proc_entry("volumes",0,cell->proc_dir);
+	if (!p)
+		goto error_vlservers;
+	p->proc_fops = &afs_proc_cell_volumes_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	_leave(" = 0");
+	return 0;
+
+ error_vlservers:
+	remove_proc_entry("vlservers",cell->proc_dir);
+ error_servers:
+	remove_proc_entry("servers",cell->proc_dir);
+ error_proc:
+	remove_proc_entry(cell->name,proc_afs);
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+} /* end afs_proc_cell_setup() */
+
+/*****************************************************************************/
+/*
+ * remove /proc/fs/afs/<cell>/
+ */
+void afs_proc_cell_remove(afs_cell_t *cell)
+{
+	_enter("");
+
+	remove_proc_entry("volumes",cell->proc_dir);
+	remove_proc_entry("vlservers",cell->proc_dir);
+	remove_proc_entry("servers",cell->proc_dir);
+	remove_proc_entry(cell->name,proc_afs);
+
+	_leave("");
+} /* end afs_proc_cell_remove() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
+ */
+static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_volumes_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_volumes_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_volumes_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	down_read(&cell->vl_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&cell->vl_list)
+		if (!pos--)
+			break;
+
+	return _p!=&cell->vl_list ? _p : NULL;
+} /* end afs_proc_cell_volumes_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = p->private;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	(*_pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? cell->vl_list.next : _p->next;
+
+	return _p!=&cell->vl_list ? _p : NULL;
+} /* end afs_proc_cell_volumes_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	up_read(&cell->vl_sem);
+
+} /* end afs_proc_cell_volumes_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
+{
+	afs_vlocation_t *vlocation = list_entry(v,afs_vlocation_t,link);
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE VLID[0]  VLID[1]  VLID[2]  NAME\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%3d %08x %08x %08x %s\n",
+		   atomic_read(&vlocation->usage),
+		   vlocation->vldb.vid[0],
+		   vlocation->vldb.vid[1],
+		   vlocation->vldb.vid[2],
+		   vlocation->vldb.name
+		   );
+
+	return 0;
+} /* end afs_proc_cell_volumes_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume location server
+ */
+static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_vlservers_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_vlservers_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_vlservers_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_vlservers_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+{
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	down_read(&cell->vl_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	if (pos>=cell->vl_naddrs)
+		return NULL;
+
+	return &cell->vl_addrs[pos];
+} /* end afs_proc_cell_vlservers_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	afs_cell_t *cell = p->private;
+	loff_t pos;
+
+	_enter("cell=%p{nad=%u} pos=%Ld",cell,cell->vl_naddrs,*_pos);
+
+	pos = *_pos;
+	(*_pos)++;
+	if (pos>=cell->vl_naddrs)
+		return NULL;
+
+	return &cell->vl_addrs[pos];
+} /* end afs_proc_cell_vlservers_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	up_read(&cell->vl_sem);
+
+} /* end afs_proc_cell_vlservers_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
+{
+	struct in_addr *addr = v;
+
+	/* display header on line 1 */
+	if (v == (struct in_addr *)1) {
+		seq_puts(m,"ADDRESS\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%u.%u.%u.%u\n",NIPQUAD(addr->s_addr));
+
+	return 0;
+} /* end afs_proc_cell_vlservers_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/servers" which provides a summary of active servers
+ */
+static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_servers_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_servers_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_servers_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_servers_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	read_lock(&cell->sv_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&cell->sv_list)
+		if (!pos--)
+			break;
+
+	return _p!=&cell->sv_list ? _p : NULL;
+} /* end afs_proc_cell_servers_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = p->private;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	(*_pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? cell->sv_list.next : _p->next;
+
+	return _p!=&cell->sv_list ? _p : NULL;
+} /* end afs_proc_cell_servers_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	read_unlock(&cell->sv_lock);
+
+} /* end afs_proc_cell_servers_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
+{
+	afs_server_t *server = list_entry(v,afs_server_t,link);
+	char ipaddr[20];
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE ADDR            STATE\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	sprintf(ipaddr,"%u.%u.%u.%u",NIPQUAD(server->addr));
+	seq_printf(m,"%3d %-15.15s %5d\n",
+		   atomic_read(&server->usage),
+		   ipaddr,
+		   server->fs_state
+		   );
+
+	return 0;
+} /* end afs_proc_cell_servers_show() */
diff --git a/fs/afs/server.c b/fs/afs/server.c
new file mode 100644
index 000000000000..b249d7cc3261
--- /dev/null
+++ b/fs/afs/server.c
@@ -0,0 +1,489 @@
+/* server.c: AFS server record management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include "volume.h"
+#include "cell.h"
+#include "server.h"
+#include "transport.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include "internal.h"
+
+spinlock_t afs_server_peer_lock = SPIN_LOCK_UNLOCKED;
+
+#define FS_SERVICE_ID		1	/* AFS Volume Location Service ID */
+#define VL_SERVICE_ID		52	/* AFS Volume Location Service ID */
+
+static void __afs_server_timeout(afs_timer_t *timer)
+{
+	afs_server_t *server = list_entry(timer,afs_server_t,timeout);
+
+	_debug("SERVER TIMEOUT [%p{u=%d}]",server,atomic_read(&server->usage));
+
+	afs_server_do_timeout(server);
+}
+
+static const struct afs_timer_ops afs_server_timer_ops = {
+	.timed_out	= __afs_server_timeout,
+};
+
+/*****************************************************************************/
+/*
+ * lookup a server record in a cell
+ * - TODO: search the cell's server list
+ */
+int afs_server_lookup(afs_cell_t *cell, const struct in_addr *addr, afs_server_t **_server)
+{
+	struct list_head *_p;
+	afs_server_t *server, *active, *zombie;
+	int loop;
+
+	_enter("%p,%08x,",cell,ntohl(addr->s_addr));
+
+	/* allocate and initialise a server record */
+	server = kmalloc(sizeof(afs_server_t),GFP_KERNEL);
+	if (!server) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(server,0,sizeof(afs_server_t));
+	atomic_set(&server->usage,1);
+
+	INIT_LIST_HEAD(&server->link);
+	init_rwsem(&server->sem);
+	INIT_LIST_HEAD(&server->fs_callq);
+	spin_lock_init(&server->fs_lock);
+	INIT_LIST_HEAD(&server->cb_promises);
+	spin_lock_init(&server->cb_lock);
+
+	for (loop=0; loop<AFS_SERVER_CONN_LIST_SIZE; loop++)
+		server->fs_conn_cnt[loop] = 4;
+
+	memcpy(&server->addr,addr,sizeof(struct in_addr));
+	server->addr.s_addr = addr->s_addr;
+
+	afs_timer_init(&server->timeout,&afs_server_timer_ops);
+
+	/* add to the cell */
+	write_lock(&cell->sv_lock);
+
+	/* check the active list */
+	list_for_each(_p,&cell->sv_list) {
+		active = list_entry(_p,afs_server_t,link);
+
+		if (active->addr.s_addr==addr->s_addr)
+			goto use_active_server;
+	}
+
+	/* check the inactive list */
+	spin_lock(&cell->sv_gylock);
+	list_for_each(_p,&cell->sv_graveyard) {
+		zombie = list_entry(_p,afs_server_t,link);
+
+		if (zombie->addr.s_addr==addr->s_addr)
+			goto resurrect_server;
+	}
+	spin_unlock(&cell->sv_gylock);
+
+	afs_get_cell(cell);
+	server->cell = cell;
+	list_add_tail(&server->link,&cell->sv_list);
+
+	write_unlock(&cell->sv_lock);
+
+	*_server = server;
+	_leave(" = 0 (%p)",server);
+	return 0;
+
+	/* found a matching active server */
+ use_active_server:
+	_debug("active server");
+	afs_get_server(active);
+	write_unlock(&cell->sv_lock);
+
+	kfree(server);
+
+	*_server = active;
+	_leave(" = 0 (%p)",active);
+	return 0;
+
+	/* found a matching server in the graveyard, so resurrect it and dispose of the new rec */
+ resurrect_server:
+	_debug("resurrecting server");
+
+	list_del(&zombie->link);
+	list_add_tail(&zombie->link,&cell->sv_list);
+	afs_get_server(zombie);
+	afs_kafstimod_del_timer(&zombie->timeout);
+	spin_unlock(&cell->sv_gylock);
+	write_unlock(&cell->sv_lock);
+
+	kfree(server);
+
+	*_server = zombie;
+	_leave(" = 0 (%p)",zombie);
+	return 0;
+
+} /* end afs_server_lookup() */
+
+/*****************************************************************************/
+/*
+ * destroy a server record
+ * - removes from the cell list
+ */
+void afs_put_server(afs_server_t *server)
+{
+	afs_cell_t *cell;
+
+	_enter("%p",server);
+
+	cell = server->cell;
+
+	/* sanity check */
+	if (atomic_read(&server->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&cell->sv_lock);
+
+	if (likely(!atomic_dec_and_test(&server->usage))) {
+		write_unlock(&cell->sv_lock);
+		_leave("");
+		return;
+	}
+
+	spin_lock(&cell->sv_gylock);
+	list_del(&server->link);
+	list_add_tail(&server->link,&cell->sv_graveyard);
+
+	/* time out in 10 secs */
+	afs_kafstimod_add_timer(&server->timeout,10*HZ);
+
+	spin_unlock(&cell->sv_gylock);
+	write_unlock(&cell->sv_lock);
+
+	_leave(" [killed]");
+} /* end afs_put_server() */
+
+/*****************************************************************************/
+/*
+ * timeout server record
+ * - removes from the cell's graveyard if the usage count is zero
+ */
+void afs_server_do_timeout(afs_server_t *server)
+{
+	struct rxrpc_peer *peer;
+	afs_cell_t *cell;
+	int loop;
+
+	_enter("%p",server);
+
+	cell = server->cell;
+
+	if (atomic_read(&server->usage)<0) BUG();
+
+	/* remove from graveyard if still dead */
+	spin_lock(&cell->vl_gylock);
+	if (atomic_read(&server->usage)==0)
+		list_del_init(&server->link);
+	else
+		server = NULL;
+	spin_unlock(&cell->vl_gylock);
+
+	if (!server) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	/* we can now destroy it properly */
+	afs_put_cell(cell);
+
+	/* uncross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	peer = server->peer;
+	if (peer) {
+		server->peer = NULL;
+		peer->user = NULL;
+	}
+	spin_unlock(&afs_server_peer_lock);
+
+	/* finish cleaning up the server */
+	for (loop=AFS_SERVER_CONN_LIST_SIZE-1; loop>=0; loop--)
+		if (server->fs_conn[loop])
+			rxrpc_put_connection(server->fs_conn[loop]);
+
+	if (server->vlserver)
+		rxrpc_put_connection(server->vlserver);
+
+	kfree(server);
+
+	_leave(" [destroyed]");
+} /* end afs_server_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * get a callslot on a connection to the fileserver on the specified server
+ */
+int afs_server_request_callslot(afs_server_t *server, struct afs_server_callslot *callslot)
+{
+	struct afs_server_callslot *pcallslot;
+	struct rxrpc_connection *conn;
+	int nconn, ret;
+
+	_enter("%p,",server);
+
+	INIT_LIST_HEAD(&callslot->link);
+	callslot->task = current;
+	callslot->conn = NULL;
+	callslot->nconn = -1;
+	callslot->ready = 0;
+
+	ret = 0;
+	conn = NULL;
+
+	/* get hold of a callslot first */
+	spin_lock(&server->fs_lock);
+
+	/* resurrect the server if it's death timeout has expired */
+	if (server->fs_state) {
+		if (time_before(jiffies,server->fs_dead_jif)) {
+			ret = server->fs_state;
+			spin_unlock(&server->fs_lock);
+			_leave(" = %d [still dead]",ret);
+			return ret;
+		}
+
+		server->fs_state = 0;
+	}
+
+	/* try and find a connection that has spare callslots */
+	for (nconn=0; nconn<AFS_SERVER_CONN_LIST_SIZE; nconn++) {
+		if (server->fs_conn_cnt[nconn]>0) {
+			server->fs_conn_cnt[nconn]--;
+			spin_unlock(&server->fs_lock);
+			callslot->nconn = nconn;
+			goto obtained_slot;
+		}
+	}
+
+	/* none were available - wait interruptibly for one to become available */
+	set_current_state(TASK_INTERRUPTIBLE);
+	list_add_tail(&callslot->link,&server->fs_callq);
+	spin_unlock(&server->fs_lock);
+
+	while (!callslot->ready && !signal_pending(current)) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	/* even if we were interrupted we may still be queued */
+	if (!callslot->ready) {
+		spin_lock(&server->fs_lock);
+		list_del_init(&callslot->link);
+		spin_unlock(&server->fs_lock);
+	}
+
+	nconn = callslot->nconn;
+
+	/* if interrupted, we must release any slot we also got before returning an error */
+	if (signal_pending(current)) {
+		ret = -EINTR;
+		goto error_release;
+	}
+
+	/* if we were woken up with an error, then pass that error back to the called */
+	if (nconn<0) {
+		_leave(" = %d",callslot->errno);
+		return callslot->errno;
+	}
+
+	/* were we given a connection directly? */
+	if (callslot->conn) {
+		/* yes - use it */
+		_leave(" = 0 (nc=%d)",nconn);
+		return 0;
+	}
+
+	/* got a callslot, but no connection */
+ obtained_slot:
+
+	/* need to get hold of the RxRPC connection */
+	down_write(&server->sem);
+
+	/* quick check to see if there's an outstanding error */
+	ret = server->fs_state;
+	if (ret)
+		goto error_release_upw;
+
+	if (server->fs_conn[nconn]) {
+		/* reuse an existing connection */
+		rxrpc_get_connection(server->fs_conn[nconn]);
+		callslot->conn = server->fs_conn[nconn];
+	}
+	else {
+		/* create a new connection */
+		ret = rxrpc_create_connection(afs_transport,
+					      htons(7000),
+					      server->addr.s_addr,
+					      FS_SERVICE_ID,
+					      NULL,
+					      &server->fs_conn[nconn]);
+
+		if (ret<0)
+			goto error_release_upw;
+
+		callslot->conn = server->fs_conn[0];
+		rxrpc_get_connection(callslot->conn);
+	}
+
+	up_write(&server->sem);
+
+ 	_leave(" = 0");
+	return 0;
+
+	/* handle an error occurring */
+ error_release_upw:
+	up_write(&server->sem);
+
+ error_release:
+	/* either release the callslot or pass it along to another deserving task */
+	spin_lock(&server->fs_lock);
+
+	if (nconn<0) {
+		/* no callslot allocated */
+	}
+	else if (list_empty(&server->fs_callq)) {
+		/* no one waiting */
+		server->fs_conn_cnt[nconn]++;
+		spin_unlock(&server->fs_lock);
+	}
+	else {
+		/* someone's waiting - dequeue them and wake them up */
+		pcallslot = list_entry(server->fs_callq.next,struct afs_server_callslot,link);
+		list_del_init(&pcallslot->link);
+
+		pcallslot->errno = server->fs_state;
+		if (!pcallslot->errno) {
+			/* pass them out callslot details */
+			callslot->conn = xchg(&pcallslot->conn,callslot->conn);
+			pcallslot->nconn = nconn;
+			callslot->nconn = nconn = -1;
+		}
+		pcallslot->ready = 1;
+		wake_up_process(pcallslot->task);
+		spin_unlock(&server->fs_lock);
+	}
+
+	if (callslot->conn) rxrpc_put_connection(callslot->conn);
+	callslot->conn = NULL;
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_server_request_callslot() */
+
+/*****************************************************************************/
+/*
+ * release a callslot back to the server
+ * - transfers the RxRPC connection to the next pending callslot if possible
+ */
+void afs_server_release_callslot(afs_server_t *server, struct afs_server_callslot *callslot)
+{
+	struct afs_server_callslot *pcallslot;
+
+	_enter("{ad=%08x,cnt=%u},{%d}",
+	       ntohl(server->addr.s_addr),
+	       server->fs_conn_cnt[callslot->nconn],
+	       callslot->nconn);
+
+	if (callslot->nconn<0) BUG();
+
+	spin_lock(&server->fs_lock);
+
+	if (list_empty(&server->fs_callq)) {
+		/* no one waiting */
+		server->fs_conn_cnt[callslot->nconn]++;
+		spin_unlock(&server->fs_lock);
+	}
+	else {
+		/* someone's waiting - dequeue them and wake them up */
+		pcallslot = list_entry(server->fs_callq.next,struct afs_server_callslot,link);
+		list_del_init(&pcallslot->link);
+
+		pcallslot->errno = server->fs_state;
+		if (!pcallslot->errno) {
+			/* pass them out callslot details */
+			callslot->conn = xchg(&pcallslot->conn,callslot->conn);
+			pcallslot->nconn = callslot->nconn;
+			callslot->nconn = -1;
+		}
+
+		pcallslot->ready = 1;
+		wake_up_process(pcallslot->task);
+		spin_unlock(&server->fs_lock);
+	}
+
+	if (callslot->conn) rxrpc_put_connection(callslot->conn);
+
+	_leave("");
+} /* end afs_server_release_callslot() */
+
+/*****************************************************************************/
+/*
+ * get a handle to a connection to the vlserver (volume location) on the specified server
+ */
+int afs_server_get_vlconn(afs_server_t *server, struct rxrpc_connection **_conn)
+{
+	struct rxrpc_connection *conn;
+	int ret;
+
+	_enter("%p,",server);
+
+	ret = 0;
+	conn = NULL;
+	down_read(&server->sem);
+
+	if (server->vlserver) {
+		/* reuse an existing connection */
+		rxrpc_get_connection(server->vlserver);
+		conn = server->vlserver;
+		up_read(&server->sem);
+	}
+	else {
+		/* create a new connection */
+		up_read(&server->sem);
+		down_write(&server->sem);
+		if (!server->vlserver) {
+			ret = rxrpc_create_connection(afs_transport,
+						      htons(7003),
+						      server->addr.s_addr,
+						      VL_SERVICE_ID,
+						      NULL,
+						      &server->vlserver);
+		}
+		if (ret==0) {
+			rxrpc_get_connection(server->vlserver);
+			conn = server->vlserver;
+		}
+		up_write(&server->sem);
+	}
+
+	*_conn = conn;
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_server_get_vlconn() */
diff --git a/fs/afs/server.h b/fs/afs/server.h
new file mode 100644
index 000000000000..feddacf2c954
--- /dev/null
+++ b/fs/afs/server.h
@@ -0,0 +1,97 @@
+/* server.h: AFS server record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_SERVER_H
+#define _LINUX_AFS_SERVER_H
+
+#include "types.h"
+#include "kafstimod.h"
+#include <rxrpc/peer.h>
+#include <linux/rwsem.h>
+
+extern spinlock_t afs_server_peer_lock;
+
+/*****************************************************************************/
+/*
+ * AFS server record
+ */
+struct afs_server
+{
+	atomic_t		usage;
+	afs_cell_t		*cell;		/* cell in which server resides */
+	struct list_head	link;		/* link in cell's server list */
+	struct rw_semaphore	sem;		/* access lock */
+	afs_timer_t		timeout;	/* graveyard timeout */
+	struct in_addr		addr;		/* server address */
+	struct rxrpc_peer	*peer;		/* peer record for this server */
+	struct rxrpc_connection	*vlserver;	/* connection to the volume location service */
+
+	/* file service access */
+#define AFS_SERVER_CONN_LIST_SIZE 2
+	struct rxrpc_connection	*fs_conn[AFS_SERVER_CONN_LIST_SIZE]; /* FS connections */
+	unsigned		fs_conn_cnt[AFS_SERVER_CONN_LIST_SIZE];	/* per conn call count */
+	struct list_head	fs_callq;	/* queue of processes waiting to make a call */
+	spinlock_t		fs_lock;	/* access lock */
+	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
+	unsigned		fs_rtt;		/* FS round trip time */
+	unsigned long		fs_act_jif;	/* time at which last activity occurred */
+	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
+
+	/* callback promise management */
+	struct list_head	cb_promises;	/* as yet unbroken promises from this server */
+	spinlock_t		cb_lock;	/* access lock */
+};
+
+extern int afs_server_lookup(afs_cell_t *cell, const struct in_addr *addr, afs_server_t **_server);
+
+#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
+
+extern void afs_put_server(afs_server_t *server);
+extern void afs_server_do_timeout(afs_server_t *server);
+
+extern int afs_server_find_by_peer(const struct rxrpc_peer *peer, afs_server_t **_server);
+
+extern int afs_server_get_vlconn(afs_server_t *server, struct rxrpc_connection **_conn);
+
+static inline afs_server_t *afs_server_get_from_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+
+	spin_lock(&afs_server_peer_lock);
+	server = peer->user;
+	if (server)
+		afs_get_server(server);
+	spin_unlock(&afs_server_peer_lock);
+
+	return server;
+}
+
+/*****************************************************************************/
+/*
+ * AFS server callslot grant record
+ */
+struct afs_server_callslot
+{
+	struct list_head	link;		/* link in server's list */
+	struct task_struct	*task;		/* process waiting to make call */
+	struct rxrpc_connection	*conn;		/* connection to use (or NULL on error) */
+	short			nconn;		/* connection slot number (-1 on error) */
+	char			ready;		/* T when ready */
+	int			errno;		/* error number if nconn==-1 */
+};
+
+extern int afs_server_request_callslot(afs_server_t *server,
+				       struct afs_server_callslot *callslot);
+
+extern void afs_server_release_callslot(afs_server_t *server,
+					struct afs_server_callslot *callslot);
+
+#endif /* _LINUX_AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
new file mode 100644
index 000000000000..18056534b504
--- /dev/null
+++ b/fs/afs/super.c
@@ -0,0 +1,595 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Howells <dhowells@redhat.com>
+ *          David Woodhouse <dwmw2@cambridge.redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "vnode.h"
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "super.h"
+#include "internal.h"
+
+#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
+
+static inline char *strdup(const char *s)
+{
+	char *ns = kmalloc(strlen(s)+1,GFP_KERNEL);
+	if (ns)
+		strcpy(ns,s);
+	return ns;
+}
+
+static void afs_i_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct super_block *afs_get_sb(struct file_system_type *fs_type,
+				      int flags, char *dev_name, void *data);
+#else
+static struct super_block *afs_read_super(struct super_block *sb, void *data, int);
+#endif
+
+static struct inode *afs_alloc_inode(struct super_block *sb);
+
+static void afs_put_super(struct super_block *sb);
+
+static void afs_destroy_inode(struct inode *inode);
+
+static struct file_system_type afs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "afs",
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.get_sb		= afs_get_sb,
+	.kill_sb	= kill_anon_super,
+#else
+	.read_super	= afs_read_super,
+#endif
+};
+
+static struct super_operations afs_super_ops = {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.statfs		= simple_statfs,
+	.alloc_inode	= afs_alloc_inode,
+	.drop_inode	= generic_delete_inode,
+	.destroy_inode	= afs_destroy_inode,
+#else
+	.read_inode2	= afs_read_inode2,
+#endif
+	.clear_inode	= afs_clear_inode,
+	.put_super	= afs_put_super,
+};
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static kmem_cache_t *afs_inode_cachep;
+#endif
+
+/*****************************************************************************/
+/*
+ * initialise the filesystem
+ */
+int __init afs_fs_init(void)
+{
+	int ret;
+
+	kenter("");
+
+	/* open the cache */
+#if 0
+	ret = -EINVAL;
+	if (!cachedev) {
+		printk(KERN_NOTICE "kAFS: No cache device specified as module parm\n");
+		printk(KERN_NOTICE "kAFS: Set with \"cachedev=<devname>\" on insmod's cmdline\n");
+		return ret;
+	}
+
+	ret = afs_cache_open(cachedev,&afs_cache);
+	if (ret<0) {
+		printk(KERN_NOTICE "kAFS: Failed to open cache device\n");
+		return ret;
+	}
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	/* create ourselves an inode cache */
+	ret = -ENOMEM;
+	afs_inode_cachep = kmem_cache_create("afs_inode_cache",
+						sizeof(afs_vnode_t),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						afs_i_init_once,
+						NULL);
+	if (!afs_inode_cachep) {
+		printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
+#if 0
+		afs_put_cache(afs_cache);
+#endif
+		return ret;
+	}
+#endif
+
+	/* now export our filesystem to lesser mortals */
+	ret = register_filesystem(&afs_fs_type);
+	if (ret<0) {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+		kmem_cache_destroy(afs_inode_cachep);
+#endif
+#if 0
+		afs_put_cache(afs_cache);
+#endif
+		kleave(" = %d",ret);
+		return ret;
+	}
+
+	kleave(" = 0");
+	return 0;
+} /* end afs_fs_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the filesystem
+ */
+void __exit afs_fs_exit(void)
+{
+	/* destroy our private inode cache */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	kmem_cache_destroy(afs_inode_cachep);
+#endif
+
+	unregister_filesystem(&afs_fs_type);
+
+#if 0
+	if (afs_cache)
+		afs_put_cache(afs_cache);
+#endif
+} /* end afs_fs_exit() */
+
+/*****************************************************************************/
+/*
+ * check that an argument has a value
+ */
+static int want_arg(char **_value, const char *option)
+{
+	if (!_value || !*_value || !**_value) {
+		printk(KERN_NOTICE "kAFS: %s: argument missing\n",option);
+		return 0;
+	}
+	return 1;
+} /* end want_arg() */
+
+/*****************************************************************************/
+/*
+ * check that there is a value
+ */
+#if 0
+static int want_value(char **_value, const char *option)
+{
+	if (!_value || !*_value || !**_value) {
+		printk(KERN_NOTICE "kAFS: %s: argument incomplete\n",option);
+		return 0;
+	}
+	return 1;
+} /* end want_value() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that there's no subsequent value
+ */
+static int want_no_value(char *const *_value, const char *option)
+{
+	if (*_value && **_value) {
+		printk(KERN_NOTICE "kAFS: %s: Invalid argument: %s\n",option,*_value);
+		return 0;
+	}
+	return 1;
+} /* end want_no_value() */
+
+/*****************************************************************************/
+/*
+ * extract a number from an option string value
+ */
+#if 0
+static int want_number(char **_value, const char *option, unsigned long *number,
+		       unsigned long limit)
+{
+	char *value = *_value;
+
+	if (!want_value(_value,option))
+		return 0;
+
+	*number = simple_strtoul(value,_value,0);
+
+	if (value==*_value) {
+		printk(KERN_NOTICE "kAFS: %s: Invalid number: %s\n",option,value);
+		return 0;
+	}
+
+	if (*number>limit) {
+		printk(KERN_NOTICE "kAFS: %s: numeric value %lu > %lu\n",option,*number,limit);
+		return 0;
+	}
+
+	return 1;
+} /* end want_number() */
+#endif
+
+/*****************************************************************************/
+/*
+ * extract a separator from an option string value
+ */
+#if 0
+static int want_sep(char **_value, const char *option, char sep)
+{
+	if (!want_value(_value,option))
+		return 0;
+
+	if (*(*_value)++ != sep) {
+		printk(KERN_NOTICE "kAFS: %s: '%c' expected: %s\n",option,sep,*_value-1);
+		return 0;
+	}
+
+	return 1;
+} /* end want_number() */
+#endif
+
+/*****************************************************************************/
+/*
+ * extract an IP address from an option string value
+ */
+#if 0
+static int want_ipaddr(char **_value, const char *option, struct in_addr *addr)
+{
+	unsigned long number[4];
+
+	if (!want_value(_value,option))
+		return 0;
+
+	if (!want_number(_value,option,&number[0],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[1],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[2],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[3],255))
+		return 0;
+
+	((u8*)addr)[0] = number[0];
+	((u8*)addr)[1] = number[1];
+	((u8*)addr)[2] = number[2];
+	((u8*)addr)[3] = number[3];
+
+	return 1;
+} /* end want_numeric() */
+#endif
+
+/*****************************************************************************/
+/*
+ * parse the mount options
+ * - this function has been shamelessly adapted from the ext3 fs which shamelessly adapted it from
+ *   the msdos fs
+ */
+static int afs_super_parse_options(struct afs_super_info *as, char *options, char **devname)
+{
+	char *key, *value;
+	int ret;
+
+	_enter("%s",options);
+
+	ret = 0;
+	while ((key = strsep(&options,",")))
+	{
+		value = strchr(key,'=');
+		if (value)
+			*value++ = 0;
+
+		printk("kAFS: KEY: %s, VAL:%s\n",key,value?:"-");
+
+		if (strcmp(key,"rwpath")==0) {
+			if (!want_no_value(&value,"rwpath")) return -EINVAL;
+			as->rwparent = 1;
+			continue;
+		}
+		else if (strcmp(key,"vol")==0) {
+			if (!want_arg(&value,"vol")) return -EINVAL;
+			*devname = value;
+			continue;
+		}
+
+#if 0
+		if (strcmp(key,"servers")==0) {
+			if (!want_arg(&value,"servers")) return -EINVAL;
+
+			_debug("servers=%s",value);
+
+			for (;;) {
+				struct in_addr addr;
+
+				if (!want_ipaddr(&value,"servers",&addr))
+					return -EINVAL;
+
+				ret = afs_create_server(as->cell,&addr,&as->server);
+				if (ret<0) {
+					printk("kAFS: unable to create server: %d\n",ret);
+					return ret;
+				}
+
+				if (!*value)
+					break;
+
+				if (as->server) {
+					printk(KERN_NOTICE
+					       "kAFS: only one server can be specified\n");
+					return -EINVAL;
+				}
+
+				if (!want_sep(&value,"servers",':'))
+					return -EINVAL;
+			}
+			continue;
+		}
+#endif
+
+		printk("kAFS: Unknown mount option: '%s'\n",key);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = 0;
+
+ error:
+	_leave(" = %d",ret);
+
+	return ret;
+} /* end afs_super_parse_options() */
+
+/*****************************************************************************/
+/*
+ * fill in the superblock
+ */
+static int afs_fill_super(struct super_block *sb, void *_data, int silent)
+{
+	struct afs_super_info *as = NULL;
+	struct dentry *root = NULL;
+	struct inode *inode = NULL;
+	afs_fid_t fid;
+	void **data = _data;
+	char *options, *devname;
+	int ret;
+
+	_enter("");
+
+	if (!data) {
+		_leave(" = -EINVAL");
+		return -EINVAL;
+	}
+	devname = data[0];
+	options = data[1];
+	if (options)
+		options[PAGE_SIZE-1] = 0;
+
+	/* allocate a superblock info record */
+	as = kmalloc(sizeof(struct afs_super_info),GFP_KERNEL);
+	if (!as) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(as,0,sizeof(struct afs_super_info));
+
+	/* parse the options */
+	if (options) {
+		ret = afs_super_parse_options(as,options,&devname);
+		if (ret<0)
+			goto error;
+		if (!devname) {
+			printk("kAFS: no volume name specified\n");
+			ret = -EINVAL;
+			goto error;
+		}
+	}
+
+	/* parse the device name */
+	ret = afs_volume_lookup(devname,as->rwparent,&as->volume);
+	if (ret<0)
+		goto error;
+
+	/* fill in the superblock */
+	sb->s_blocksize		= PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_magic		= AFS_FS_MAGIC;
+	sb->s_op		= &afs_super_ops;
+	sb->s_fs_info		= as;
+
+	/* allocate the root inode and dentry */
+	fid.vid		= as->volume->vid;
+	fid.vnode	= 1;
+	fid.unique	= 1;
+	ret = afs_iget(sb,&fid,&inode);
+	if (ret<0)
+		goto error;
+
+	ret = -ENOMEM;
+	root = d_alloc_root(inode);
+	if (!root)
+		goto error;
+
+	sb->s_root = root;
+
+	_leave(" = 0");
+	return 0;
+
+ error:
+	if (root) dput(root);
+	if (inode) iput(inode);
+	if (as) {
+		if (as->volume)		afs_put_volume(as->volume);
+		kfree(as);
+	}
+	sb->s_fs_info = NULL;
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_fill_super() */
+
+/*****************************************************************************/
+/*
+ * get an AFS superblock
+ * - TODO: don't use get_sb_nodev(), but rather call sget() directly
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct super_block *afs_get_sb(struct file_system_type *fs_type,
+				      int flags,
+				      char *dev_name,
+				      void *options)
+{
+	struct super_block *sb;
+	void *data[2] = { dev_name, options };
+	int ret;
+
+	_enter(",,%s,%p",dev_name,options);
+
+	/* start the cache manager */
+	ret = afscm_start();
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	/* allocate a deviceless superblock */
+	sb = get_sb_nodev(fs_type,flags,data,afs_fill_super);
+	if (IS_ERR(sb)) {
+		afscm_stop();
+		return sb;
+	}
+
+	_leave("");
+	return sb;
+} /* end afs_get_sb() */
+#endif
+
+/*****************************************************************************/
+/*
+ * read an AFS superblock
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+static struct super_block *afs_read_super(struct super_block *sb, void *options, int silent)
+{
+	void *data[2] = { NULL, options };
+	int ret;
+
+	_enter(",,%s",(char*)options);
+
+	/* start the cache manager */
+	ret = afscm_start();
+	if (ret<0) {
+		_leave(" = NULL (%d)",ret);
+		return NULL;
+	}
+
+	/* allocate a deviceless superblock */
+	ret = afs_fill_super(sb,data,silent);
+	if (ret<0) {
+		afscm_stop();
+		_leave(" = NULL (%d)",ret);
+		return NULL;
+	}
+
+	_leave(" = %p",sb);
+	return sb;
+} /* end afs_read_super() */
+#endif
+
+/*****************************************************************************/
+/*
+ * finish the unmounting process on the superblock
+ */
+static void afs_put_super(struct super_block *sb)
+{
+	struct afs_super_info *as = sb->s_fs_info;
+
+	_enter("");
+
+	if (as) {
+		if (as->volume)		afs_put_volume(as->volume);
+	}
+
+	/* stop the cache manager */
+	afscm_stop();
+
+	_leave("");
+} /* end afs_put_super() */
+
+/*****************************************************************************/
+/*
+ * initialise an inode cache slab element prior to any use
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep, unsigned long flags)
+{
+	afs_vnode_t *vnode = (afs_vnode_t *) _vnode;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vnode,0,sizeof(*vnode));
+		inode_init_once(&vnode->vfs_inode);
+		init_waitqueue_head(&vnode->update_waitq);
+		spin_lock_init(&vnode->lock);
+		INIT_LIST_HEAD(&vnode->cb_link);
+		INIT_LIST_HEAD(&vnode->cb_hash_link);
+		afs_timer_init(&vnode->cb_timeout,&afs_vnode_cb_timed_out_ops);
+	}
+
+} /* end afs_i_init_once() */
+#endif
+
+/*****************************************************************************/
+/*
+ * allocate an AFS inode struct from our slab cache
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct inode *afs_alloc_inode(struct super_block *sb)
+{
+	afs_vnode_t *vnode;
+
+	vnode = (afs_vnode_t *) kmem_cache_alloc(afs_inode_cachep,SLAB_KERNEL);
+	if (!vnode)
+		return NULL;
+
+	memset(&vnode->fid,0,sizeof(vnode->fid));
+	memset(&vnode->status,0,sizeof(vnode->status));
+
+	vnode->volume = NULL;
+	vnode->update_cnt = 0;
+	vnode->flags = 0;
+
+	return &vnode->vfs_inode;
+} /* end afs_alloc_inode() */
+#endif
+
+/*****************************************************************************/
+/*
+ * destroy an AFS inode struct
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static void afs_destroy_inode(struct inode *inode)
+{
+	_enter("{%lu}",inode->i_ino);
+	kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
+} /* end afs_destroy_inode() */
+#endif
diff --git a/fs/afs/super.h b/fs/afs/super.h
new file mode 100644
index 000000000000..b307b0884181
--- /dev/null
+++ b/fs/afs/super.h
@@ -0,0 +1,43 @@
+/* super.h: AFS filesystem internal private data
+ *
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#ifndef _LINUX_AFS_SUPER_H
+#define _LINUX_AFS_SUPER_H
+
+#include <linux/fs.h>
+#include "server.h"
+
+#ifdef __KERNEL__
+
+/*****************************************************************************/
+/*
+ * AFS superblock private data
+ * - there's one superblock per volume
+ */
+struct afs_super_info
+{
+	afs_volume_t		*volume;	/* volume record */
+	char			rwparent;	/* T if parent is R/W AFS volume */
+};
+
+static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
new file mode 100644
index 000000000000..7013ae6ccc8c
--- /dev/null
+++ b/fs/afs/transport.h
@@ -0,0 +1,21 @@
+/* transport.h: AFS transport management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_TRANSPORT_H
+#define _LINUX_AFS_TRANSPORT_H
+
+#include "types.h"
+#include <rxrpc/transport.h>
+
+/* the cache manager transport endpoint */
+extern struct rxrpc_transport *afs_transport;
+
+#endif /* _LINUX_AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
new file mode 100644
index 000000000000..411925f4fa04
--- /dev/null
+++ b/fs/afs/types.h
@@ -0,0 +1,152 @@
+/* types.h: AFS types
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_TYPES_H
+#define _LINUX_AFS_TYPES_H
+
+#ifdef __KERNEL__
+#include <rxrpc/types.h>
+#endif /* __KERNEL__ */
+
+typedef unsigned			afs_volid_t;
+typedef unsigned			afs_vnodeid_t;
+typedef unsigned long long		afs_dataversion_t;
+
+typedef struct afs_async_op		afs_async_op_t;
+typedef struct afs_callback		afs_callback_t;
+typedef struct afs_cell			afs_cell_t;
+typedef struct afs_fid			afs_fid_t;
+typedef struct afs_file_status		afs_file_status_t;
+typedef struct afs_server		afs_server_t;
+typedef struct afs_timer		afs_timer_t;
+typedef struct afs_vlocation		afs_vlocation_t;
+typedef struct afs_vnode		afs_vnode_t;
+typedef struct afs_volsync		afs_volsync_t;
+typedef struct afs_volume		afs_volume_t;
+typedef struct afs_volume_info		afs_volume_info_t;
+
+typedef struct afsc_cache		afsc_cache_t;
+typedef struct afsc_cache_cell		afsc_cache_cell_t;
+typedef struct afsc_cache_vldb		afsc_cache_vldb_t;
+typedef struct afsc_cell_record		afsc_cell_record_t;
+typedef struct afsc_inode		afsc_inode_t;
+typedef struct afsc_io			afsc_io_t;
+typedef struct afsc_io_subop		afsc_io_subop_t;
+typedef struct afsc_io_queue		afsc_io_queue_t;
+typedef struct afsc_super_block		afsc_super_block_t;
+typedef struct afsc_vldb_record		afsc_vldb_record_t;
+typedef struct afsc_vnode_catalogue	afsc_vnode_catalogue_t;
+typedef struct afsc_vnode_meta		afsc_vnode_meta_t;
+
+typedef struct afsvl_dbentry		afsvl_dbentry_t;
+
+typedef enum {
+	AFSVL_RWVOL,			/* read/write volume */
+	AFSVL_ROVOL,			/* read-only volume */
+	AFSVL_BACKVOL,			/* backup volume */
+} afs_voltype_t;
+
+extern const char *afs_voltypes[];
+
+typedef enum {
+	AFS_FTYPE_INVALID	= 0,
+	AFS_FTYPE_FILE		= 1,
+	AFS_FTYPE_DIR		= 2,
+	AFS_FTYPE_SYMLINK	= 3,
+} afs_file_type_t;
+
+#ifdef __KERNEL__
+
+/*****************************************************************************/
+/*
+ * AFS file identifier
+ */
+struct afs_fid
+{
+	afs_volid_t	vid;		/* volume ID */
+	afs_vnodeid_t	vnode;		/* file index within volume */
+	unsigned	unique;		/* unique ID number (file index version) */
+};
+
+/*****************************************************************************/
+/*
+ * AFS callback notification
+ */
+typedef enum {
+	AFSCM_CB_UNTYPED	= 0,	/* no type set on CB break */
+	AFSCM_CB_EXCLUSIVE	= 1,	/* CB exclusive to CM [not implemented] */
+	AFSCM_CB_SHARED		= 2,	/* CB shared by other CM's */
+	AFSCM_CB_DROPPED	= 3,	/* CB promise cancelled by file server */
+} afs_callback_type_t;
+
+struct afs_callback
+{
+	afs_server_t		*server;	/* server that made the promise */
+	afs_fid_t		fid;		/* file identifier */
+	unsigned		version;	/* callback version */
+	unsigned		expiry;		/* time at which expires */
+	afs_callback_type_t	type;		/* type of callback */
+};
+
+#define AFSCBMAX 50
+
+/*****************************************************************************/
+/*
+ * AFS volume information
+ */
+struct afs_volume_info
+{
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t		type;		/* type of this volume */
+	afs_volid_t		type_vids[5];	/* volume ID's for possible types for this vol */
+	
+	/* list of fileservers serving this volume */
+	size_t			nservers;	/* number of entries used in servers[] */
+	struct {
+		struct in_addr	addr;		/* fileserver address */
+	} servers[8];
+};
+
+/*****************************************************************************/
+/*
+ * AFS file status information
+ */
+struct afs_file_status
+{
+	unsigned		if_version;	/* interface version */
+#define AFS_FSTATUS_VERSION	1
+
+	afs_file_type_t		type;		/* file type */
+	unsigned		nlink;		/* link count */
+	size_t			size;		/* file size */
+	afs_dataversion_t	version;	/* current data version */
+	unsigned		author;		/* author ID */
+	unsigned		owner;		/* owner ID */
+	unsigned		caller_access;	/* access rights for authenticated caller */
+	unsigned		anon_access;	/* access rights for unauthenticated caller */
+	umode_t			mode;		/* UNIX mode */
+	afs_fid_t		parent;		/* parent file ID */
+	time_t			mtime_client;	/* last time client changed data */
+	time_t			mtime_server;	/* last time server changed data */
+};
+
+/*****************************************************************************/
+/*
+ * AFS volume synchronisation information
+ */
+struct afs_volsync
+{
+	time_t			creation;	/* volume creation time */
+};
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_TYPES_H */
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
new file mode 100644
index 000000000000..564e9939af40
--- /dev/null
+++ b/fs/afs/vlclient.c
@@ -0,0 +1,662 @@
+/* vlclient.c: AFS Volume Location Service client
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "server.h"
+#include "volume.h"
+#include "vlclient.h"
+#include "kafsasyncd.h"
+#include "kafstimod.h"
+#include "errors.h"
+#include "internal.h"
+
+#define VLGETENTRYBYID		503	/* AFS Get Cache Entry By ID operation ID */
+#define VLGETENTRYBYNAME	504	/* AFS Get Cache Entry By Name operation ID */
+#define VLPROBE			514	/* AFS Probe Volume Location Service operation ID */
+
+static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
+static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
+
+/*****************************************************************************/
+/*
+ * map afs VL abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afs_rxvl_aemap(struct rxrpc_call *call)
+{
+	int err;
+
+	_enter("{%u,%u,%d}",call->app_err_state,call->app_abort_code,call->app_errno);
+
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		return;
+
+	case RXRPC_ESTATE_PEER_ABORT:
+		switch (call->app_abort_code) {
+		case AFSVL_IDEXIST:		err = -EEXIST;		break;
+		case AFSVL_IO:			err = -EREMOTEIO;	break;
+		case AFSVL_NAMEEXIST:		err = -EEXIST;		break;
+		case AFSVL_CREATEFAIL:		err = -EREMOTEIO;	break;
+		case AFSVL_NOENT:		err = -ENOMEDIUM;	break;
+		case AFSVL_EMPTY:		err = -ENOMEDIUM;	break;
+		case AFSVL_ENTDELETED:		err = -ENOMEDIUM;	break;
+		case AFSVL_BADNAME:		err = -EINVAL;		break;
+		case AFSVL_BADINDEX:		err = -EINVAL;		break;
+		case AFSVL_BADVOLTYPE:		err = -EINVAL;		break;
+		case AFSVL_BADSERVER:		err = -EINVAL;		break;
+		case AFSVL_BADPARTITION:	err = -EINVAL;		break;
+		case AFSVL_REPSFULL:		err = -EFBIG;		break;
+		case AFSVL_NOREPSERVER:		err = -ENOENT;		break;
+		case AFSVL_DUPREPSERVER:	err = -EEXIST;		break;
+		case AFSVL_RWNOTFOUND:		err = -ENOENT;		break;
+		case AFSVL_BADREFCOUNT:		err = -EINVAL;		break;
+		case AFSVL_SIZEEXCEEDED:	err = -EINVAL;		break;
+		case AFSVL_BADENTRY:		err = -EINVAL;		break;
+		case AFSVL_BADVOLIDBUMP:	err = -EINVAL;		break;
+		case AFSVL_IDALREADYHASHED:	err = -EINVAL;		break;
+		case AFSVL_ENTRYLOCKED:		err = -EBUSY;		break;
+		case AFSVL_BADVOLOPER:		err = -EBADRQC;		break;
+		case AFSVL_BADRELLOCKTYPE:	err = -EINVAL;		break;
+		case AFSVL_RERELEASE:		err = -EREMOTEIO;	break;
+		case AFSVL_BADSERVERFLAG:	err = -EINVAL;		break;
+		case AFSVL_PERM:		err = -EACCES;		break;
+		case AFSVL_NOMEM:		err = -EREMOTEIO;	break;
+		default:
+			err = afs_abort_to_error(call->app_abort_code);
+			break;
+		}
+		call->app_errno = err;
+		return;
+
+	default:
+		return;
+	}
+} /* end afs_rxvl_aemap() */
+
+/*****************************************************************************/
+/*
+ * probe a volume location server to see if it is still alive
+ */
+int afs_rxvl_probe(afs_server_t *server, int alloc_flags)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 param[1];
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLPROBE;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLPROBE);
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,alloc_flags,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	return ret;
+
+} /* end afs_rxvl_probe() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by name
+ */
+int afs_rxvl_get_entry_by_name(afs_server_t *server, const char *volname,
+			       afsc_vldb_record_t *entry)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	unsigned tmp;
+	size_t sent;
+	int ret, loop;
+	u32 *bp, param[2], zero;
+
+	_enter(",%s,",volname);
+
+	memset(entry,0,sizeof(*entry));
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLGETENTRYBYNAME;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	piov[1].iov_len = strlen(volname);
+	piov[1].iov_base = (char*)volname;
+
+	zero = 0;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	param[0] = htonl(VLGETENTRYBYNAME);
+	param[1] = htonl(piov[1].iov_len);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,384);
+
+	ret = rxrpc_call_read_data(call,bp,384,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	for (loop=0; loop<64; loop++)
+		entry->name[loop] = ntohl(*bp++);
+	bp++; /* final NUL */
+
+	bp++; /* type */
+	entry->nservers = ntohl(*bp++);
+
+	for (loop=0; loop<8; loop++)
+		entry->servers[loop].s_addr = *bp++;
+
+	bp += 8; /* partition IDs */
+
+	for (loop=0; loop<8; loop++) {
+		tmp = ntohl(*bp++);
+		if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+	}
+
+	entry->vid[0] = ntohl(*bp++);
+	entry->vid[1] = ntohl(*bp++);
+	entry->vid[2] = ntohl(*bp++);
+
+	bp++; /* clone ID */
+
+	tmp = ntohl(*bp++); /* flags */
+	if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+	if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+	if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+	ret = -ENOMEDIUM;
+	if (!entry->vidmask)
+		goto abort;
+
+	/* success */
+	entry->ctime = xtime.tv_sec;
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxvl_get_entry_by_name() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by ID
+ */
+int afs_rxvl_get_entry_by_id(afs_server_t *server,
+			     afs_volid_t volid,
+			     afs_voltype_t voltype,
+			     afsc_vldb_record_t *entry)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	unsigned tmp;
+	size_t sent;
+	int ret, loop;
+	u32 *bp, param[3];
+
+	_enter(",%x,%d,",volid,voltype);
+
+	memset(entry,0,sizeof(*entry));
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLGETENTRYBYID;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLGETENTRYBYID);
+	param[1] = htonl(volid);
+	param[2] = htonl(voltype);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,384);
+
+	ret = rxrpc_call_read_data(call,bp,384,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	for (loop=0; loop<64; loop++)
+		entry->name[loop] = ntohl(*bp++);
+	bp++; /* final NUL */
+
+	bp++; /* type */
+	entry->nservers = ntohl(*bp++);
+
+	for (loop=0; loop<8; loop++)
+		entry->servers[loop].s_addr = *bp++;
+
+	bp += 8; /* partition IDs */
+
+	for (loop=0; loop<8; loop++) {
+		tmp = ntohl(*bp++);
+		if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+	}
+
+	entry->vid[0] = ntohl(*bp++);
+	entry->vid[1] = ntohl(*bp++);
+	entry->vid[2] = ntohl(*bp++);
+
+	bp++; /* clone ID */
+
+	tmp = ntohl(*bp++); /* flags */
+	if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+	if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+	if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+	ret = -ENOMEDIUM;
+	if (!entry->vidmask)
+		goto abort;
+
+#if 0 /* TODO: remove */
+	entry->nservers = 3;
+	entry->servers[0].s_addr = htonl(0xac101249);
+	entry->servers[1].s_addr = htonl(0xac101243);
+	entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
+
+	entry->srvtmask[0] = AFSC_VOL_STM_RO;
+	entry->srvtmask[1] = AFSC_VOL_STM_RO;
+	entry->srvtmask[2] = AFSC_VOL_STM_RO | AFSC_VOL_STM_RW;
+#endif
+
+	/* success */
+	entry->ctime = xtime.tv_sec;
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxvl_get_entry_by_id() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by ID asynchronously
+ */
+int afs_rxvl_get_entry_by_id_async(afs_async_op_t *op,
+				   afs_volid_t volid,
+				   afs_voltype_t voltype)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 param[3];
+
+	_enter(",%x,%d,",volid,voltype);
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(op->server,&conn);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,
+				afs_rxvl_get_entry_by_id_attn,
+				afs_rxvl_get_entry_by_id_error,
+				afs_rxvl_aemap,
+				&op->call);
+	rxrpc_put_connection(conn);
+
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	op->call->app_opcode = VLGETENTRYBYID;
+	op->call->app_user = op;
+
+	call = op->call;
+	rxrpc_get_call(call);
+
+	/* send event notifications from the call to kafsasyncd */
+	afs_kafsasyncd_begin_op(op);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLGETENTRYBYID);
+	param[1] = htonl(volid);
+	param[2] = htonl(voltype);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* allocate result read buffer in scratch space */
+	call->app_scr_ptr = rxrpc_call_alloc_scratch(op->call,384);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0) {
+		rxrpc_call_abort(call,ret); /* handle from kafsasyncd */
+		ret = 0;
+		goto out;
+	}
+
+	/* wait for the reply to completely arrive */
+	ret = rxrpc_call_read_data(call,call->app_scr_ptr,384,0);
+	switch (ret) {
+	case 0:
+	case -EAGAIN:
+	case -ECONNABORTED:
+		ret = 0;
+		break;	/* all handled by kafsasyncd */
+
+	default:
+		rxrpc_call_abort(call,ret); /* force kafsasyncd to handle it */
+		ret = 0;
+		break;
+	}
+
+ out:
+	rxrpc_put_call(call);
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_rxvl_get_entry_by_id_async() */
+
+/*****************************************************************************/
+/*
+ * attend to the asynchronous get VLDB entry by ID
+ */
+int afs_rxvl_get_entry_by_id_async2(afs_async_op_t *op,
+				    afsc_vldb_record_t *entry)
+{
+	unsigned *bp, tmp;
+	int loop, ret;
+
+	_enter("{op=%p cst=%u}",op,op->call->app_call_state);
+
+	memset(entry,0,sizeof(*entry));
+
+	if (op->call->app_call_state==RXRPC_CSTATE_COMPLETE) {
+		/* operation finished */
+		afs_kafsasyncd_terminate_op(op);
+
+		bp = op->call->app_scr_ptr;
+
+		/* unmarshall the reply */
+		for (loop=0; loop<64; loop++)
+			entry->name[loop] = ntohl(*bp++);
+		bp++; /* final NUL */
+
+		bp++; /* type */
+		entry->nservers = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].s_addr = *bp++;
+
+		bp += 8; /* partition IDs */
+
+		for (loop=0; loop<8; loop++) {
+			tmp = ntohl(*bp++);
+			if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+			if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+			if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+		}
+
+		entry->vid[0] = ntohl(*bp++);
+		entry->vid[1] = ntohl(*bp++);
+		entry->vid[2] = ntohl(*bp++);
+
+		bp++; /* clone ID */
+
+		tmp = ntohl(*bp++); /* flags */
+		if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+		ret = -ENOMEDIUM;
+		if (!entry->vidmask) {
+			rxrpc_call_abort(op->call,ret);
+			goto done;
+		}
+
+#if 0 /* TODO: remove */
+		entry->nservers = 3;
+		entry->servers[0].s_addr = htonl(0xac101249);
+		entry->servers[1].s_addr = htonl(0xac101243);
+		entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
+
+		entry->srvtmask[0] = AFSC_VOL_STM_RO;
+		entry->srvtmask[1] = AFSC_VOL_STM_RO;
+		entry->srvtmask[2] = AFSC_VOL_STM_RO | AFSC_VOL_STM_RW;
+#endif
+
+		/* success */
+		entry->ctime = xtime.tv_sec;
+		ret = 0;
+		goto done;
+	}
+
+	if (op->call->app_call_state==RXRPC_CSTATE_ERROR) {
+		/* operation error */
+		ret = op->call->app_errno;
+		goto done;
+	}
+
+	_leave(" = -EAGAIN");
+	return -EAGAIN;
+
+ done:
+	rxrpc_put_call(op->call);
+	op->call = NULL;
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_rxvl_get_entry_by_id_async2() */
+
+/*****************************************************************************/
+/*
+ * handle attention events on an async get-entry-by-ID op
+ * - called from krxiod
+ */
+static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
+{
+	afs_async_op_t *op = call->app_user;
+
+	_enter("{op=%p cst=%u}",op,call->app_call_state);
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_COMPLETE:
+		afs_kafsasyncd_attend_op(op);
+		break;
+	case RXRPC_CSTATE_CLNT_RCV_REPLY:
+		if (call->app_async_read)
+			break;
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		if (call->app_read_count==0)
+			break;
+		printk("kAFS: Reply bigger than expected {cst=%u asyn=%d mark=%d rdy=%u pr=%u%s}",
+		       call->app_call_state,
+		       call->app_async_read,
+		       call->app_mark,
+		       call->app_ready_qty,
+		       call->pkt_rcv_count,
+		       call->app_last_rcv ? " last" : "");
+
+		rxrpc_call_abort(call,-EBADMSG);
+		break;
+	default:
+		BUG();
+	}
+
+	_leave("");
+
+} /* end afs_rxvl_get_entry_by_id_attn() */
+
+/*****************************************************************************/
+/*
+ * handle error events on an async get-entry-by-ID op
+ * - called from krxiod
+ */
+static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call)
+{
+	afs_async_op_t *op = call->app_user;
+
+	_enter("{op=%p cst=%u}",op,call->app_call_state);
+
+	afs_kafsasyncd_attend_op(op);
+
+	_leave("");
+
+} /* end afs_rxvl_get_entry_by_id_error() */
diff --git a/fs/afs/vlclient.h b/fs/afs/vlclient.h
new file mode 100644
index 000000000000..5791e04d6382
--- /dev/null
+++ b/fs/afs/vlclient.h
@@ -0,0 +1,95 @@
+/* vlclient.h: Volume Location Service client interface
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VLCLIENT_H
+#define _LINUX_AFS_VLCLIENT_H
+
+#include "types.h"
+
+enum AFSVL_Errors {
+	AFSVL_IDEXIST 		= 363520,	/* Volume Id entry exists in vl database */
+	AFSVL_IO 		= 363521,	/* I/O related error */
+	AFSVL_NAMEEXIST 	= 363522,	/* Volume name entry exists in vl database */
+	AFSVL_CREATEFAIL 	= 363523,	/* Internal creation failure */
+	AFSVL_NOENT 		= 363524,	/* No such entry */
+	AFSVL_EMPTY 		= 363525,	/* Vl database is empty */
+	AFSVL_ENTDELETED 	= 363526,	/* Entry is deleted (soft delete) */
+	AFSVL_BADNAME 		= 363527,	/* Volume name is illegal */
+	AFSVL_BADINDEX 		= 363528,	/* Index is out of range */
+	AFSVL_BADVOLTYPE 	= 363529,	/* Bad volume type */
+	AFSVL_BADSERVER 	= 363530,	/* Illegal server number (out of range) */
+	AFSVL_BADPARTITION 	= 363531,	/* Bad partition number */
+	AFSVL_REPSFULL 		= 363532,	/* Run out of space for Replication sites */
+	AFSVL_NOREPSERVER 	= 363533,	/* No such Replication server site exists */
+	AFSVL_DUPREPSERVER 	= 363534,	/* Replication site already exists */
+	AFSVL_RWNOTFOUND 	= 363535,	/* Parent R/W entry not found */
+	AFSVL_BADREFCOUNT 	= 363536,	/* Illegal Reference Count number */
+	AFSVL_SIZEEXCEEDED 	= 363537,	/* Vl size for attributes exceeded */
+	AFSVL_BADENTRY 		= 363538,	/* Bad incoming vl entry */
+	AFSVL_BADVOLIDBUMP 	= 363539,	/* Illegal max volid increment */
+	AFSVL_IDALREADYHASHED 	= 363540,	/* RO/BACK id already hashed */
+	AFSVL_ENTRYLOCKED 	= 363541,	/* Vl entry is already locked */
+	AFSVL_BADVOLOPER 	= 363542,	/* Bad volume operation code */
+	AFSVL_BADRELLOCKTYPE 	= 363543,	/* Bad release lock type */
+	AFSVL_RERELEASE 	= 363544,	/* Status report: last release was aborted */
+	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server �ag */
+	AFSVL_PERM 		= 363546,	/* No permission access */
+	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
+};
+
+/* maps to "struct vldbentry" in vvl-spec.pdf */
+struct  afsvl_dbentry {
+	char		name[65];		/* name of volume (including NUL char) */
+	afs_voltype_t	type;			/* volume type */
+	unsigned	num_servers;		/* num servers that hold instances of this vol */
+	unsigned	clone_id;		/* cloning ID */
+
+	unsigned	flags;
+#define AFS_VLF_RWEXISTS	0x1000		/* R/W volume exists */
+#define AFS_VLF_ROEXISTS	0x2000		/* R/O volume exists */
+#define AFS_VLF_BACKEXISTS	0x4000		/* backup volume exists */
+
+	afs_volid_t	volume_ids[3];		/* volume IDs */
+
+	struct {
+		struct in_addr	addr;		/* server address */
+		unsigned	partition;	/* partition ID on this server */
+		unsigned	flags;		/* server specific flags */
+#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
+#define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
+#define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
+#define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
+	} servers[8];
+
+};
+
+/* probe a volume location server to see if it is still alive */
+extern int afs_rxvl_probe(afs_server_t *server, int alloc_flags);
+
+/* look up a volume location database entry by name */
+extern int afs_rxvl_get_entry_by_name(afs_server_t *server,
+				      const char *volname,
+				      afsc_vldb_record_t *entry);
+
+/* look up a volume location database entry by ID */
+extern int afs_rxvl_get_entry_by_id(afs_server_t *server,
+				    afs_volid_t	volid,
+				    afs_voltype_t voltype,
+				    afsc_vldb_record_t *entry);
+
+extern int afs_rxvl_get_entry_by_id_async(afs_async_op_t *op,
+					  afs_volid_t volid,
+					  afs_voltype_t voltype);
+
+extern int afs_rxvl_get_entry_by_id_async2(afs_async_op_t *op,
+					   afsc_vldb_record_t *entry);
+
+#endif /* _LINUX_AFS_VLCLIENT_H */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
new file mode 100644
index 000000000000..8d9f4d7e8f29
--- /dev/null
+++ b/fs/afs/vlocation.c
@@ -0,0 +1,824 @@
+/* vlocation.c: volume location management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include <rxrpc/connection.h>
+#include "internal.h"
+
+#define AFS_VLDB_TIMEOUT HZ*1000
+
+static void afs_vlocation_update_timer(afs_timer_t *timer);
+static void afs_vlocation_update_attend(afs_async_op_t *op);
+static void afs_vlocation_update_discard(afs_async_op_t *op);
+
+static void __afs_vlocation_timeout(afs_timer_t *timer)
+{
+	afs_vlocation_t *vlocation = list_entry(timer,afs_vlocation_t,timeout);
+
+	_debug("VL TIMEOUT [%s{u=%d}]",vlocation->vldb.name,atomic_read(&vlocation->usage));
+
+	afs_vlocation_do_timeout(vlocation);
+}
+
+static const struct afs_timer_ops afs_vlocation_timer_ops = {
+	.timed_out	= __afs_vlocation_timeout,
+};
+
+static const struct afs_timer_ops afs_vlocation_update_timer_ops = {
+	.timed_out	= afs_vlocation_update_timer,
+};
+
+static const struct afs_async_op_ops afs_vlocation_update_op_ops = {
+	.attend		= afs_vlocation_update_attend,
+	.discard	= afs_vlocation_update_discard,
+};
+
+static LIST_HEAD(afs_vlocation_update_pendq);	/* queue of VLs awaiting update */
+static afs_vlocation_t *afs_vlocation_update;	/* VL currently being updated */
+static spinlock_t afs_vlocation_update_lock = SPIN_LOCK_UNLOCKED; /* lock guarding update queue */
+
+/*****************************************************************************/
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ * question
+ * - caller must have cell->vl_sem write-locked
+ */
+static int afs_vlocation_access_vl_by_name(afs_vlocation_t *vlocation,
+					   const char *name,
+					   afsc_vldb_record_t *vldb)
+{
+	afs_server_t *server = NULL;
+	afs_cell_t *cell = vlocation->cell;
+	int count, ret;
+
+	_enter("%s,%s,",cell->name,name);
+
+	ret = -ENOMEDIUM;
+	for (count=cell->vl_naddrs; count>0; count--) {
+		_debug("CellServ[%hu]: %08x",
+		       cell->vl_curr_svix,cell->vl_addrs[cell->vl_curr_svix].s_addr);
+
+		/* try and create a server */
+		ret = afs_server_lookup(cell,&cell->vl_addrs[cell->vl_curr_svix],&server);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOMEM:
+		case -ENONET:
+			goto out;
+		default:
+			goto rotate;
+		}
+
+		/* attempt to access the VL server */
+		ret = afs_rxvl_get_entry_by_name(server,name,vldb);
+		switch (ret) {
+		case 0:
+			afs_put_server(server);
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			down_write(&server->sem);
+			if (server->vlserver) {
+				rxrpc_put_connection(server->vlserver);
+				server->vlserver = NULL;
+			}
+			up_write(&server->sem);
+			afs_put_server(server);
+			if (ret==-ENOMEM || ret==-ENONET)
+				goto out;
+			goto rotate;
+		case -ENOMEDIUM:
+			afs_put_server(server);
+			goto out;
+		default:
+			afs_put_server(server);
+			ret = -ENOMEDIUM;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vlocation_access_vl_by_name() */
+
+/*****************************************************************************/
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ * question
+ * - caller must have cell->vl_sem write-locked
+ */
+static int afs_vlocation_access_vl_by_id(afs_vlocation_t *vlocation,
+					 afs_volid_t volid,
+					 afs_voltype_t voltype,
+					 afsc_vldb_record_t *vldb)
+{
+	afs_server_t *server = NULL;
+	afs_cell_t *cell = vlocation->cell;
+	int count, ret;
+
+	_enter("%s,%x,%d,",cell->name,volid,voltype);
+
+	ret = -ENOMEDIUM;
+	for (count=cell->vl_naddrs; count>0; count--) {
+		_debug("CellServ[%hu]: %08x",
+		       cell->vl_curr_svix,cell->vl_addrs[cell->vl_curr_svix].s_addr);
+
+		/* try and create a server */
+		ret = afs_server_lookup(cell,&cell->vl_addrs[cell->vl_curr_svix],&server);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOMEM:
+		case -ENONET:
+			goto out;
+		default:
+			goto rotate;
+		}
+
+		/* attempt to access the VL server */
+		ret = afs_rxvl_get_entry_by_id(server,volid,voltype,vldb);
+		switch (ret) {
+		case 0:
+			afs_put_server(server);
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			down_write(&server->sem);
+			if (server->vlserver) {
+				rxrpc_put_connection(server->vlserver);
+				server->vlserver = NULL;
+			}
+			up_write(&server->sem);
+			afs_put_server(server);
+			if (ret==-ENOMEM || ret==-ENONET)
+				goto out;
+			goto rotate;
+		case -ENOMEDIUM:
+			afs_put_server(server);
+			goto out;
+		default:
+			afs_put_server(server);
+			ret = -ENOMEDIUM;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vlocation_access_vl_by_id() */
+
+/*****************************************************************************/
+/*
+ * lookup volume location
+ * - caller must have cell->vol_sem write-locked
+ * - iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ *   question
+ * - lookup in the local cache if not able to find on the VL server
+ * - insert/update in the local cache if did get a VL response
+ */
+int afs_vlocation_lookup(afs_cell_t *cell, const char *name, afs_vlocation_t **_vlocation)
+{
+	afsc_vldb_record_t vldb;
+	struct list_head *_p;
+	afs_vlocation_t *vlocation;
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int active = 0, ret;
+
+	_enter(",%s,%s,",cell->name,name);
+
+	if (strlen(name)>sizeof(vlocation->vldb.name)) {
+		_leave(" = -ENAMETOOLONG");
+		return -ENAMETOOLONG;
+	}
+
+	/* search the cell's active list first */
+	list_for_each(_p,&cell->vl_list) {
+		vlocation = list_entry(_p,afs_vlocation_t,link);
+		if (strncmp(vlocation->vldb.name,name,sizeof(vlocation->vldb.name))==0)
+			goto found_in_memory;
+	}
+
+	/* search the cell's graveyard list second */
+	spin_lock(&cell->vl_gylock);
+	list_for_each(_p,&cell->vl_graveyard) {
+		vlocation = list_entry(_p,afs_vlocation_t,link);
+		if (strncmp(vlocation->vldb.name,name,sizeof(vlocation->vldb.name))==0)
+			goto found_in_graveyard;
+	}
+	spin_unlock(&cell->vl_gylock);
+
+	/* not in the cell's in-memory lists - create a new record */
+	vlocation = kmalloc(sizeof(afs_vlocation_t),GFP_KERNEL);
+	if (!vlocation)
+		return -ENOMEM;
+
+	memset(vlocation,0,sizeof(afs_vlocation_t));
+	atomic_set(&vlocation->usage,1);
+	INIT_LIST_HEAD(&vlocation->link);
+	rwlock_init(&vlocation->lock);
+	strncpy(vlocation->vldb.name,name,sizeof(vlocation->vldb.name));
+
+	afs_timer_init(&vlocation->timeout,&afs_vlocation_timer_ops);
+	afs_timer_init(&vlocation->upd_timer,&afs_vlocation_update_timer_ops);
+	afs_async_op_init(&vlocation->upd_op,&afs_vlocation_update_op_ops);
+
+	INIT_LIST_HEAD(&vlocation->caches);
+
+	afs_get_cell(cell);
+	vlocation->cell = cell;
+
+	list_add_tail(&vlocation->link,&cell->vl_list);
+
+#if 0
+	/* search local cache if wasn't in memory */
+	ret = afsc_lookup_vlocation(vlocation);
+	switch (ret) {
+	default:	goto error;		/* disk error */
+	case 0:		goto found_in_cache;	/* pulled from local cache into memory */
+	case -ENOENT:	break;			/* not in local cache */
+	}
+#endif
+
+	/* try to look up an unknown volume in the cell VL databases by name */
+	ret = afs_vlocation_access_vl_by_name(vlocation,name,&vldb);
+	if (ret<0) {
+		printk("kAFS: failed to locate '%s' in cell '%s'\n",name,cell->name);
+		goto error;
+	}
+
+	goto found_on_vlserver;
+
+ found_in_graveyard:
+	/* found in the graveyard - resurrect */
+	_debug("found in graveyard");
+	atomic_inc(&vlocation->usage);
+	list_del(&vlocation->link);
+	list_add_tail(&vlocation->link,&cell->vl_list);
+	spin_unlock(&cell->vl_gylock);
+
+	afs_kafstimod_del_timer(&vlocation->timeout);
+	goto active;
+
+ found_in_memory:
+	/* found in memory - check to see if it's active */
+	_debug("found in memory");
+	atomic_inc(&vlocation->usage);
+
+ active:
+	active = 1;
+
+/* found_in_cache: */
+	/* try to look up a cached volume in the cell VL databases by ID */
+	_debug("found in cache");
+
+	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       vlocation->vldb.name,
+	       vlocation->vldb.vidmask,
+	       ntohl(vlocation->vldb.servers[0].s_addr),vlocation->vldb.srvtmask[0],
+	       ntohl(vlocation->vldb.servers[1].s_addr),vlocation->vldb.srvtmask[1],
+	       ntohl(vlocation->vldb.servers[2].s_addr),vlocation->vldb.srvtmask[2]
+	       );
+
+	_debug("Vids: %08x %08x %08x",
+	       vlocation->vldb.vid[0],vlocation->vldb.vid[1],vlocation->vldb.vid[2]);
+
+	if (vlocation->vldb.vidmask & AFSC_VOL_STM_RW) {
+		vid = vlocation->vldb.vid[0];
+		voltype = AFSVL_RWVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_RO) {
+		vid = vlocation->vldb.vid[1];
+		voltype = AFSVL_ROVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_BAK) {
+		vid = vlocation->vldb.vid[2];
+		voltype = AFSVL_BACKVOL;
+	}
+	else {
+		BUG();
+		vid = 0;
+		voltype = 0;
+	}
+
+	ret = afs_vlocation_access_vl_by_id(vlocation,vid,voltype,&vldb);
+	switch (ret) {
+		/* net error */
+	default:
+		printk("kAFS: failed to volume '%s' (%x) up in '%s': %d\n",
+		       name,vid,cell->name,ret);
+		goto error;
+
+		/* pulled from local cache into memory */
+	case 0:	
+		goto found_on_vlserver;
+
+		/* uh oh... looks like the volume got deleted */
+	case -ENOMEDIUM:
+		printk("kAFS: volume '%s' (%x) does not exist '%s'\n",name,vid,cell->name);
+
+		/* TODO: make existing record unavailable */
+		goto error;
+	}
+
+ found_on_vlserver:
+	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       name,
+	       vldb.vidmask,
+	       ntohl(vldb.servers[0].s_addr),vldb.srvtmask[0],
+	       ntohl(vldb.servers[1].s_addr),vldb.srvtmask[1],
+	       ntohl(vldb.servers[2].s_addr),vldb.srvtmask[2]
+	       );
+
+	_debug("Vids: %08x %08x %08x",vldb.vid[0],vldb.vid[1],vldb.vid[2]);
+
+	if (strncmp(vldb.name,name,sizeof(vlocation->vldb.name))!=0)
+		printk("kAFS: name of volume '%s' changed to '%s' on server\n",name,vldb.name);
+
+	memcpy(&vlocation->vldb,&vldb,sizeof(vlocation->vldb));
+
+#if 0
+	/* add volume entry to local cache */
+	ret = afsc_update_vlocation(vlocation);
+	if (ret<0)
+		goto error;
+#endif
+
+	afs_kafstimod_add_timer(&vlocation->upd_timer,10*HZ);
+
+	*_vlocation = vlocation;
+	_leave(" = 0 (%p)",vlocation);
+	return 0;
+
+ error:
+	if (vlocation) {
+		if (active) {
+			__afs_put_vlocation(vlocation);
+		}
+		else {
+			list_del(&vlocation->link);
+			afs_put_cell(vlocation->cell);
+#if 0
+			afs_put_cache(vlocation->cache);
+#endif
+			kfree(vlocation);
+		}
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vlocation_lookup() */
+
+/*****************************************************************************/
+/*
+ * finish using a volume location record
+ * - caller must have cell->vol_sem write-locked
+ */
+void __afs_put_vlocation(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell = vlocation->cell;
+
+	_enter("%s",vlocation->vldb.name);
+
+	/* sanity check */
+	if (atomic_read(&vlocation->usage)<=0)
+		BUG();
+
+	spin_lock(&cell->vl_gylock);
+	if (likely(!atomic_dec_and_test(&vlocation->usage))) {
+		spin_unlock(&cell->vl_gylock);
+		_leave("");
+		return;
+	}
+
+	/* move to graveyard queue */
+	list_del(&vlocation->link);
+	list_add_tail(&vlocation->link,&cell->vl_graveyard);
+
+	/* remove from pending timeout queue (refcounted if actually being updated) */
+	list_del_init(&vlocation->upd_op.link);
+
+	/* time out in 10 secs */
+	afs_kafstimod_del_timer(&vlocation->upd_timer);
+	afs_kafstimod_add_timer(&vlocation->timeout,10*HZ);
+
+	spin_unlock(&cell->vl_gylock);
+
+	_leave(" [killed]");
+} /* end __afs_put_vlocation() */
+
+/*****************************************************************************/
+/*
+ * finish using a volume location record
+ */
+void afs_put_vlocation(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell = vlocation->cell;
+
+	down_write(&cell->vl_sem);
+	__afs_put_vlocation(vlocation);
+	up_write(&cell->vl_sem);
+} /* end afs_put_vlocation() */
+
+/*****************************************************************************/
+/*
+ * timeout vlocation record
+ * - removes from the cell's graveyard if the usage count is zero
+ */
+void afs_vlocation_do_timeout(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell;
+
+	_enter("%s",vlocation->vldb.name);
+
+	cell = vlocation->cell;
+
+	if (atomic_read(&vlocation->usage)<0) BUG();
+
+	/* remove from graveyard if still dead */
+	spin_lock(&cell->vl_gylock);
+	if (atomic_read(&vlocation->usage)==0)
+		list_del_init(&vlocation->link);
+	else
+		vlocation = NULL;
+	spin_unlock(&cell->vl_gylock);
+
+	if (!vlocation) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	/* we can now destroy it properly */
+	afs_put_cell(cell);
+#if 0
+	afs_put_cache(vlocation->cache);
+#endif
+
+	kfree(vlocation);
+
+	_leave(" [destroyed]");
+} /* end afs_vlocation_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * send an update operation to the currently selected server
+ */
+static int afs_vlocation_update_begin(afs_vlocation_t *vlocation)
+{
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int ret;
+
+	_enter("%s{ufs=%u ucs=%u}",
+	       vlocation->vldb.name,vlocation->upd_first_svix,vlocation->upd_curr_svix);
+
+	/* try to look up a cached volume in the cell VL databases by ID */
+	if (vlocation->vldb.vidmask & AFSC_VOL_STM_RW) {
+		vid = vlocation->vldb.vid[0];
+		voltype = AFSVL_RWVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_RO) {
+		vid = vlocation->vldb.vid[1];
+		voltype = AFSVL_ROVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_BAK) {
+		vid = vlocation->vldb.vid[2];
+		voltype = AFSVL_BACKVOL;
+	}
+	else {
+		BUG();
+		vid = 0;
+		voltype = 0;
+	}
+
+	/* contact the chosen server */
+	ret = afs_server_lookup(vlocation->cell,
+				&vlocation->cell->vl_addrs[vlocation->upd_curr_svix],
+				&vlocation->upd_op.server);
+	switch (ret) {
+	case 0:
+		break;
+	case -ENOMEM:
+	case -ENONET:
+	default:
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* initiate the update operation */
+	ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op,vid,voltype);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vlocation_update_begin() */
+
+/*****************************************************************************/
+/*
+ * abandon updating a VL record
+ * - does not restart the update timer
+ */
+static void afs_vlocation_update_abandon(afs_vlocation_t *vlocation,
+					 afs_vlocation_upd_t state,
+					 int ret)
+{
+	_enter("%s,%u",vlocation->vldb.name,state);
+
+	if (ret<0)
+		printk("kAFS: Abandoning VL update '%s': %d\n",vlocation->vldb.name,ret);
+
+	/* discard the server record */
+	if (vlocation->upd_op.server) {
+		afs_put_server(vlocation->upd_op.server);
+		vlocation->upd_op.server = NULL;
+	}
+
+	spin_lock(&afs_vlocation_update_lock);
+	afs_vlocation_update = NULL;
+	vlocation->upd_state = state;
+
+	/* TODO: start updating next VL record on pending list */
+
+	spin_unlock(&afs_vlocation_update_lock);
+
+	_leave("");
+} /* end afs_vlocation_update_abandon() */
+
+/*****************************************************************************/
+/*
+ * handle periodic update timeouts and busy retry timeouts
+ * - called from kafstimod
+ */
+static void afs_vlocation_update_timer(afs_timer_t *timer)
+{
+	afs_vlocation_t *vlocation = list_entry(timer,afs_vlocation_t,upd_timer);
+	int ret;
+
+	_enter("%s",vlocation->vldb.name);
+
+	/* only update if not in the graveyard (defend against putting too) */
+	spin_lock(&vlocation->cell->vl_gylock);
+
+	if (!atomic_read(&vlocation->usage))
+		goto out_unlock1;
+
+	spin_lock(&afs_vlocation_update_lock);
+
+	/* if we were woken up due to EBUSY sleep then restart immediately if possible or else jump
+	 * to front of pending queue */
+	if (vlocation->upd_state==AFS_VLUPD_BUSYSLEEP) {
+		if (afs_vlocation_update) {
+			list_add(&vlocation->upd_op.link,&afs_vlocation_update_pendq);
+		}
+		else {
+			afs_get_vlocation(vlocation);
+			afs_vlocation_update = vlocation;
+			vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+		}
+		goto out_unlock2;
+	}
+
+	/* put on pending queue if there's already another update in progress */
+	if (afs_vlocation_update) {
+		vlocation->upd_state = AFS_VLUPD_PENDING;
+		list_add_tail(&vlocation->upd_op.link,&afs_vlocation_update_pendq);
+		goto out_unlock2;
+	}
+
+	/* hold a ref on it while actually updating */
+	afs_get_vlocation(vlocation);
+	afs_vlocation_update = vlocation;
+	vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+
+	spin_unlock(&afs_vlocation_update_lock);
+	spin_unlock(&vlocation->cell->vl_gylock);
+
+	/* okay... we can start the update */
+	_debug("BEGIN VL UPDATE [%s]",vlocation->vldb.name);
+	vlocation->upd_first_svix = vlocation->cell->vl_curr_svix;
+	vlocation->upd_curr_svix = vlocation->upd_first_svix;
+	vlocation->upd_rej_cnt = 0;
+	vlocation->upd_busy_cnt = 0;
+
+	ret = afs_vlocation_update_begin(vlocation);
+	if (ret<0) {
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,ret);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+	}
+
+	_leave("");
+	return;
+
+ out_unlock2:
+	spin_unlock(&afs_vlocation_update_lock);
+ out_unlock1:
+	spin_unlock(&vlocation->cell->vl_gylock);
+	_leave("");
+	return;
+
+} /* end afs_vlocation_update_timer() */
+
+/*****************************************************************************/
+/*
+ * attend to an update operation upon which an event happened
+ * - called in kafsasyncd context
+ */
+static void afs_vlocation_update_attend(afs_async_op_t *op)
+{
+	afsc_vldb_record_t vldb;
+	afs_vlocation_t *vlocation = list_entry(op,afs_vlocation_t,upd_op);
+	unsigned tmp;
+	int ret;
+
+	_enter("%s",vlocation->vldb.name);
+
+	ret = afs_rxvl_get_entry_by_id_async2(op,&vldb);
+	switch (ret) {
+	case -EAGAIN:
+		_leave(" [unfinished]");
+		return;
+
+	case 0:
+		_debug("END VL UPDATE: %d\n",ret);
+		vlocation->valid = 1;
+
+		_debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }",
+		       vldb.vidmask,
+		       ntohl(vldb.servers[0].s_addr),vldb.srvtmask[0],
+		       ntohl(vldb.servers[1].s_addr),vldb.srvtmask[1],
+		       ntohl(vldb.servers[2].s_addr),vldb.srvtmask[2]
+		       );
+
+		_debug("Vids: %08x %08x %08x",vldb.vid[0],vldb.vid[1],vldb.vid[2]);
+
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,0);
+
+		down_write(&vlocation->cell->vl_sem);
+
+		/* actually update the cache */
+		if (strncmp(vldb.name,vlocation->vldb.name,sizeof(vlocation->vldb.name))!=0)
+			printk("kAFS: name of volume '%s' changed to '%s' on server\n",
+			       vlocation->vldb.name,vldb.name);
+
+		memcpy(&vlocation->vldb,&vldb,sizeof(vlocation->vldb));
+
+#if 0
+		/* add volume entry to local cache */
+		ret = afsc_update_vlocation(vlocation);
+#endif
+
+		up_write(&vlocation->cell->vl_sem);
+
+		if (ret<0)
+			printk("kAFS: failed to update local cache: %d\n",ret);
+
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+		_leave(" [found]");
+		return;
+
+	case -ENOMEDIUM:
+		vlocation->upd_rej_cnt++;
+		goto try_next;
+
+		/* the server is locked - retry in a very short while */
+	case -EBUSY:
+		vlocation->upd_busy_cnt++;
+		if (vlocation->upd_busy_cnt>3)
+			goto try_next; /* too many retries */
+
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_BUSYSLEEP,0);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,HZ/2);
+		afs_put_vlocation(vlocation);
+		_leave(" [busy]");
+		return;
+
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -EREMOTEIO:
+		/* record bad vlserver info in the cell too
+		 * - TODO: use down_write_trylock() if available
+		 */
+		if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix)
+			vlocation->cell->vl_curr_svix =
+				vlocation->cell->vl_curr_svix % vlocation->cell->vl_naddrs;
+
+	case -EBADRQC:
+	case -EINVAL:
+	case -EACCES:
+	case -EBADMSG:
+		goto try_next;
+
+	default:
+		goto abandon;
+	}
+
+	/* try contacting the next server */
+ try_next:
+	vlocation->upd_busy_cnt = 0;
+
+	if (vlocation->upd_op.server) {
+		/* discard the server record */
+		afs_put_server(vlocation->upd_op.server);
+		vlocation->upd_op.server = NULL;
+	}
+
+	tmp = vlocation->cell->vl_naddrs;
+	if (tmp==0)
+		goto abandon;
+
+	vlocation->upd_curr_svix++;
+	if (vlocation->upd_curr_svix >= tmp) vlocation->upd_curr_svix = 0;
+	if (vlocation->upd_first_svix >= tmp) vlocation->upd_first_svix = tmp - 1;
+
+	/* move to the next server */
+	if (vlocation->upd_curr_svix!=vlocation->upd_first_svix) {
+		afs_vlocation_update_begin(vlocation);
+		_leave(" [next]");
+		return;
+	}
+
+	/* run out of servers to try - was the volume rejected? */
+	if (vlocation->upd_rej_cnt>0) {
+		printk("kAFS: Active volume no longer valid '%s'\n",vlocation->vldb.name);
+		vlocation->valid = 0;
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,0);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+		_leave(" [invalidated]");
+		return;
+	}
+
+	/* abandon the update */
+ abandon:
+	afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,ret);
+	afs_kafstimod_add_timer(&vlocation->upd_timer,HZ*10);
+	afs_put_vlocation(vlocation);
+	_leave(" [abandoned]");
+
+} /* end afs_vlocation_update_attend() */
+
+/*****************************************************************************/
+/*
+ * deal with an update operation being discarded
+ * - called in kafsasyncd context when it's dying due to rmmod
+ * - the call has already been aborted and put()'d
+ */
+static void afs_vlocation_update_discard(afs_async_op_t *op)
+{
+	afs_vlocation_t *vlocation = list_entry(op,afs_vlocation_t,upd_op);
+
+	_enter("%s",vlocation->vldb.name);
+
+	afs_put_server(op->server);
+	op->server = NULL;
+
+	afs_put_vlocation(vlocation);
+
+	_leave("");
+} /* end afs_vlocation_update_discard() */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
new file mode 100644
index 000000000000..ab2d7b241fc3
--- /dev/null
+++ b/fs/afs/vnode.c
@@ -0,0 +1,316 @@
+/* vnode.c: AFS vnode management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "vnode.h"
+#include "internal.h"
+
+static void afs_vnode_cb_timed_out(struct afs_timer *timer);
+
+struct afs_timer_ops afs_vnode_cb_timed_out_ops = {
+	.timed_out	= afs_vnode_cb_timed_out,
+};
+
+/*****************************************************************************/
+/*
+ * handle a callback timing out
+ * TODO: retain a ref to vnode struct for an outstanding callback timeout
+ */
+static void afs_vnode_cb_timed_out(struct afs_timer *timer)
+{
+	afs_server_t *oldserver;
+	afs_vnode_t *vnode;
+
+	vnode = list_entry(timer,afs_vnode_t,cb_timeout);
+
+	_enter("%p",vnode);
+
+	/* set the changed flag in the vnode and release the server */
+	spin_lock(&vnode->lock);
+
+	oldserver = xchg(&vnode->cb_server,NULL);
+	if (oldserver) {
+		vnode->flags |= AFS_VNODE_CHANGED;
+
+		spin_lock(&afs_cb_hash_lock);
+		list_del_init(&vnode->cb_hash_link);
+		spin_unlock(&afs_cb_hash_lock);
+
+		spin_lock(&oldserver->cb_lock);
+		list_del_init(&vnode->cb_link);
+		spin_unlock(&oldserver->cb_lock);
+	}
+
+	spin_unlock(&vnode->lock);
+
+	if (oldserver)
+		afs_put_server(oldserver);
+
+	_leave("");
+} /* end afs_vnode_cb_timed_out() */
+
+/*****************************************************************************/
+/*
+ * finish off updating the recorded status of a file
+ * - starts callback expiry timer
+ * - adds to server's callback list
+ */
+void afs_vnode_finalise_status_update(afs_vnode_t *vnode, afs_server_t *server, int ret)
+{
+	afs_server_t *oldserver = NULL;
+
+	_enter("%p,%p,%d",vnode,server,ret);
+
+	spin_lock(&vnode->lock);
+
+	vnode->flags &= ~AFS_VNODE_CHANGED;
+
+	if (ret==0) {
+		/* adjust the callback timeout appropriately */
+		afs_kafstimod_add_timer(&vnode->cb_timeout,vnode->cb_expiry*HZ);
+
+		spin_lock(&afs_cb_hash_lock);
+		list_del(&vnode->cb_hash_link);
+		list_add_tail(&vnode->cb_hash_link,&afs_cb_hash(server,&vnode->fid));
+		spin_unlock(&afs_cb_hash_lock);
+
+		/* swap ref to old callback server with that for new callback server */
+		oldserver = xchg(&vnode->cb_server,server);
+		if (oldserver!=server) {
+			if (oldserver) {
+				spin_lock(&oldserver->cb_lock);
+				list_del_init(&vnode->cb_link);
+				spin_unlock(&oldserver->cb_lock);
+			}
+
+			afs_get_server(server);
+			spin_lock(&server->cb_lock);
+			list_add_tail(&vnode->cb_link,&server->cb_promises);
+			spin_unlock(&server->cb_lock);
+		}
+		else {
+			/* same server */
+			oldserver = NULL;
+		}
+	}
+	else if (ret==-ENOENT) {
+		/* the file was deleted - clear the callback timeout */
+		oldserver = xchg(&vnode->cb_server,NULL);
+		afs_kafstimod_del_timer(&vnode->cb_timeout);
+
+		_debug("got NOENT from server - marking file deleted");
+		vnode->flags |= AFS_VNODE_DELETED;
+	}
+
+	vnode->update_cnt--;
+
+	spin_unlock(&vnode->lock);
+
+	wake_up_all(&vnode->update_waitq);
+
+	if (oldserver)
+		afs_put_server(oldserver);
+
+	_leave("");
+
+} /* end afs_vnode_finalise_status_update() */
+
+/*****************************************************************************/
+/*
+ * fetch file status from the volume
+ * - don't issue a fetch if:
+ *   - the changed bit is not set and there's a valid callback
+ *   - there are any outstanding ops that will fetch the status
+ * - TODO implement local caching
+ */
+int afs_vnode_fetch_status(afs_vnode_t *vnode)
+{
+	afs_server_t *server;
+	int ret;
+
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("%s,{%u,%u,%u}",vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	if (!(vnode->flags & AFS_VNODE_CHANGED) && vnode->cb_server) {
+		_leave(" [unchanged]");
+		return 0;
+	}
+
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" [deleted]");
+		return -ENOENT;
+	}
+
+	spin_lock(&vnode->lock);
+
+	if (!(vnode->flags & AFS_VNODE_CHANGED)) {
+		spin_unlock(&vnode->lock);
+		_leave(" [unchanged]");
+		return 0;
+	}
+
+	if (vnode->update_cnt>0) {
+		/* someone else started a fetch */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&vnode->update_waitq,&myself);
+
+		/* wait for the status to be updated */
+		for (;;) {
+			if (!(vnode->flags & AFS_VNODE_CHANGED))	break;
+			if (vnode->flags & AFS_VNODE_DELETED)		break;
+
+			/* it got updated and invalidated all before we saw it */
+			if (vnode->update_cnt==0) {
+				remove_wait_queue(&vnode->update_waitq,&myself);
+				set_current_state(TASK_RUNNING);
+				goto get_anyway;
+			}
+
+			spin_unlock(&vnode->lock);
+
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+			spin_lock(&vnode->lock);
+		}
+
+		remove_wait_queue(&vnode->update_waitq,&myself);
+		spin_unlock(&vnode->lock);
+		set_current_state(TASK_RUNNING);
+
+		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
+	}
+
+ get_anyway:
+	/* okay... we're going to have to initiate the op */
+	vnode->update_cnt++;
+
+	spin_unlock(&vnode->lock);
+
+	/* merge AFS status fetches and clear outstanding callback on this vnode */
+	do {
+		/* pick a server to query */
+		ret = afs_volume_pick_fileserver(vnode->volume,&server);
+		if (ret<0)
+			return ret;
+
+		_debug("USING SERVER: %08x\n",ntohl(server->addr.s_addr));
+
+		ret = afs_rxfs_fetch_file_status(server,vnode,NULL);
+
+	} while (!afs_volume_release_fileserver(vnode->volume,server,ret));
+
+	/* adjust the flags */
+	afs_vnode_finalise_status_update(vnode,server,ret);
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vnode_fetch_status() */
+
+/*****************************************************************************/
+/*
+ * fetch file data from the volume
+ * - TODO implement caching and server failover
+ */
+int afs_vnode_fetch_data(afs_vnode_t *vnode, struct afs_rxfs_fetch_descriptor *desc)
+{
+	afs_server_t *server;
+	int ret;
+
+	_enter("%s,{%u,%u,%u}",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique);
+
+	/* this op will fetch the status */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+
+	/* merge in AFS status fetches and clear outstanding callback on this vnode */
+	do {
+		/* pick a server to query */
+		ret = afs_volume_pick_fileserver(vnode->volume,&server);
+		if (ret<0)
+			return ret;
+
+		_debug("USING SERVER: %08x\n",ntohl(server->addr.s_addr));
+
+		ret = afs_rxfs_fetch_file_data(server,vnode,desc,NULL);
+
+	} while (!afs_volume_release_fileserver(vnode->volume,server,ret));
+
+	/* adjust the flags */
+	afs_vnode_finalise_status_update(vnode,server,ret);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vnode_fetch_data() */
+
+/*****************************************************************************/
+/*
+ * break any outstanding callback on a vnode
+ * - only relevent to server that issued it
+ */
+int afs_vnode_give_up_callback(afs_vnode_t *vnode)
+{
+	afs_server_t *server;
+	int ret;
+
+	_enter("%s,{%u,%u,%u}",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique);
+
+	spin_lock(&afs_cb_hash_lock);
+	list_del_init(&vnode->cb_hash_link);
+	spin_unlock(&afs_cb_hash_lock);
+
+	/* set the changed flag in the vnode and release the server */
+	spin_lock(&vnode->lock);
+
+	afs_kafstimod_del_timer(&vnode->cb_timeout);
+
+	server = xchg(&vnode->cb_server,NULL);
+	if (server) {
+		vnode->flags |= AFS_VNODE_CHANGED;
+
+		spin_lock(&server->cb_lock);
+		list_del_init(&vnode->cb_link);
+		spin_unlock(&server->cb_lock);
+	}
+
+	spin_unlock(&vnode->lock);
+
+	ret = 0;
+	if (server) {
+		ret = afs_rxfs_give_up_callback(server,vnode);
+		afs_put_server(server);
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vnode_give_up_callback() */
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
new file mode 100644
index 000000000000..ec2c412c4214
--- /dev/null
+++ b/fs/afs/vnode.h
@@ -0,0 +1,88 @@
+/* vnode.h: AFS vnode record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VNODE_H
+#define _LINUX_AFS_VNODE_H
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include "server.h"
+#include "kafstimod.h"
+
+#ifdef __KERNEL__
+
+struct afs_rxfs_fetch_descriptor;
+
+/*****************************************************************************/
+/*
+ * AFS inode private data
+ */
+struct afs_vnode
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	struct inode		vfs_inode;	/* the VFS's inode record */
+#else
+	struct inode		*inode;		/* the VFS's inode */
+#endif
+
+	afs_volume_t		*volume;	/* volume on which vnode resides */
+	afs_fid_t		fid;		/* the file identifier for this inode */
+	afs_file_status_t	status;		/* AFS status info for this file */
+	unsigned		nix;		/* vnode index in cache */
+
+	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
+	unsigned		update_cnt;	/* number of outstanding ops that will update the
+						 * status */
+	spinlock_t		lock;		/* waitqueue/flags lock */
+	unsigned		flags;
+#define AFS_VNODE_CHANGED	0x00000001	/* set if vnode reported changed by callback */
+#define AFS_VNODE_DELETED	0x00000002	/* set if vnode deleted on server */
+#define AFS_VNODE_MOUNTPOINT	0x00000004	/* set if vnode is a mountpoint symlink */
+
+	/* outstanding callback notification on this file */
+	afs_server_t		*cb_server;	/* server that made the current promise */
+	struct list_head	cb_link;	/* link in server's promises list */
+	struct list_head	cb_hash_link;	/* link in master callback hash */
+	afs_timer_t		cb_timeout;	/* timeout on promise */
+	unsigned		cb_version;	/* callback version */
+	unsigned		cb_expiry;	/* callback expiry time */
+	afs_callback_type_t	cb_type;	/* type of callback */
+};
+
+static inline afs_vnode_t *AFS_FS_I(struct inode *inode)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	return list_entry(inode,afs_vnode_t,vfs_inode);
+#else
+	return inode->u.generic_ip;
+#endif
+}
+
+static inline struct inode *AFS_VNODE_TO_I(afs_vnode_t *vnode)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	return &vnode->vfs_inode;
+#else
+	return vnode->inode;
+#endif
+}
+
+extern int afs_vnode_fetch_status(afs_vnode_t *vnode);
+
+extern int afs_vnode_fetch_data(afs_vnode_t *vnode, struct afs_rxfs_fetch_descriptor *desc);
+
+extern int afs_vnode_give_up_callback(afs_vnode_t *vnode);
+
+extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
new file mode 100644
index 000000000000..198c355c715b
--- /dev/null
+++ b/fs/afs/volume.c
@@ -0,0 +1,430 @@
+/* volume.c: AFS volume management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "internal.h"
+
+const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
+
+/*****************************************************************************/
+/*
+ * lookup a volume by name
+ * - this can be one of the following:
+ *	"%[cell:]volume[.]"		R/W volume
+ *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0), or R/W (rwparent=1) volume
+ *	"%[cell:]volume.readonly"	R/O volume
+ *	"#[cell:]volume.readonly"	R/O volume
+ *	"%[cell:]volume.backup"		Backup volume
+ *	"#[cell:]volume.backup"		Backup volume
+ *
+ * The cell name is optional, and defaults to the current cell.
+ *
+ * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin Guide
+ * - Rule 1: Explicit type suffix forces access of that type or nothing
+ *           (no suffix, then use Rule 2 & 3)
+ * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W if not available
+ * - Rule 3: If parent volume is R/W, then only mount R/W volume unless explicitly told otherwise
+ */
+int afs_volume_lookup(char *name, int rwparent, afs_volume_t **_volume)
+{
+	afs_vlocation_t *vlocation = NULL;
+	afs_voltype_t type;
+	afs_volume_t *volume = NULL;
+	afs_cell_t *cell = NULL;
+	char *cellname, *volname, *suffix;
+	char srvtmask;
+	int force, ret, loop;
+
+	_enter(",%s,",name);
+
+	if (!name || (name[0]!='%' && name[0]!='#') || !name[1]) {
+		printk("kAFS: unparsable volume name\n");
+		return -EINVAL;
+	}
+
+	/* determine the type of volume we're looking for */
+	force = 0;
+	type = AFSVL_ROVOL;
+
+	if (rwparent || name[0]=='%') {
+		type = AFSVL_RWVOL;
+		force = 1;
+	}
+
+	suffix = strrchr(name,'.');
+	if (suffix) {
+		if (strcmp(suffix,".readonly")==0) {
+			type = AFSVL_ROVOL;
+			force = 1;
+		}
+		else if (strcmp(suffix,".backup")==0) {
+			type = AFSVL_BACKVOL;
+			force = 1;
+		}
+		else if (suffix[1]==0) {
+			*suffix = 0;
+			suffix = NULL;
+		}
+		else {
+			suffix = NULL;
+		}
+	}
+
+	/* split the cell and volume names */
+	name++;
+	volname = strchr(name,':');
+	if (volname) {
+		*volname++ = 0;
+		cellname = name;
+	}
+	else {
+		volname = name;
+		cellname = NULL;
+	}
+
+	_debug("CELL:%s VOLUME:%s SUFFIX:%s TYPE:%d%s",
+	       cellname,volname,suffix?:"-",type,force?" FORCE":"");
+
+	/* lookup the cell record */
+	ret = afs_cell_lookup(cellname,&cell);
+	if (ret<0)
+		printk("kAFS: unable to lookup cell '%s'\n",cellname?:"");
+
+	if (cellname) volname[-1] = ':';
+	if (ret<0)
+		goto error;
+
+	/* lookup the volume location record */
+	if (suffix) *suffix = 0;
+	ret = afs_vlocation_lookup(cell,volname,&vlocation);
+	if (suffix) *suffix = '.';
+	if (ret<0)
+		goto error;
+
+	/* make the final decision on the type we want */
+	ret = -ENOMEDIUM;
+	if (force && !(vlocation->vldb.vidmask & (1<<type)))
+		goto error;
+
+	srvtmask = 0;
+	for (loop=0; loop<vlocation->vldb.nservers; loop++)
+		srvtmask |= vlocation->vldb.srvtmask[loop];
+
+	if (force) {
+		if (!(srvtmask & (1 <<type)))
+			goto error;
+	}
+	else if (srvtmask & AFSC_VOL_STM_RO) {
+		type = AFSVL_ROVOL;
+	}
+	else if (srvtmask & AFSC_VOL_STM_RW) {
+		type = AFSVL_RWVOL;
+	}
+	else {
+		goto error;
+	}
+
+	down_write(&cell->vl_sem);
+
+	/* is the volume already active? */
+	if (vlocation->vols[type]) {
+		/* yes - re-use it */
+		volume = vlocation->vols[type];
+		afs_get_volume(volume);
+		goto success;
+	}
+
+	/* create a new volume record */
+	_debug("creating new volume record");
+
+	ret = -ENOMEM;
+	volume = kmalloc(sizeof(afs_volume_t),GFP_KERNEL);
+	if (!volume)
+		goto error_up;
+
+	memset(volume,0,sizeof(afs_volume_t));
+	atomic_set(&volume->usage,1);
+	volume->type = type;
+	volume->type_force = force;
+	volume->cell = cell;
+	volume->vid = vlocation->vldb.vid[type];
+
+	init_rwsem(&volume->server_sem);
+
+	/* look up all the applicable server records */
+	for (loop=0; loop<8; loop++) {
+		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
+			ret = afs_server_lookup(volume->cell,
+						&vlocation->vldb.servers[loop],
+						&volume->servers[volume->nservers]);
+			if (ret<0)
+				goto error_discard;
+
+			volume->nservers++;
+		}
+	}
+
+	/* attach the cache and volume location */
+#if 0
+	afs_get_cache(cache);		volume->cache = cache;
+#endif
+	afs_get_vlocation(vlocation);	volume->vlocation = vlocation;
+
+	vlocation->vols[type] = volume;
+
+ success:
+	_debug("kAFS selected %s volume %08x",afs_voltypes[volume->type],volume->vid);
+	*_volume = volume;
+	ret = 0;
+
+	/* clean up */
+ error_up:
+	up_write(&cell->vl_sem);
+ error:
+	if (vlocation)	afs_put_vlocation(vlocation);
+	if (cell)	afs_put_cell(cell);
+
+	_leave(" = %d (%p)",ret,volume);
+	return ret;
+
+ error_discard:
+	up_write(&cell->vl_sem);
+
+	for (loop=volume->nservers-1; loop>=0; loop--)
+		if (volume->servers[loop])
+			afs_put_server(volume->servers[loop]);
+
+	kfree(volume);
+	goto error;
+} /* end afs_volume_lookup() */
+
+/*****************************************************************************/
+/*
+ * destroy a volume record
+ */
+void afs_put_volume(afs_volume_t *volume)
+{
+	afs_vlocation_t *vlocation;
+	int loop;
+
+	_enter("%p",volume);
+
+	vlocation = volume->vlocation;
+
+	/* sanity check */
+	if (atomic_read(&volume->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	down_write(&vlocation->cell->vl_sem);
+
+	if (likely(!atomic_dec_and_test(&volume->usage))) {
+		up_write(&vlocation->cell->vl_sem);
+		_leave("");
+		return;
+	}
+
+	vlocation->vols[volume->type] = NULL;
+
+	up_write(&vlocation->cell->vl_sem);
+
+	afs_put_vlocation(vlocation);
+
+	/* finish cleaning up the volume */
+#if 0
+	if (volume->cache)	afs_put_cache(volume->cache);
+#endif
+
+	for (loop=volume->nservers-1; loop>=0; loop--)
+		if (volume->servers[loop])
+			afs_put_server(volume->servers[loop]);
+
+	kfree(volume);
+
+	_leave(" [destroyed]");
+} /* end afs_put_volume() */
+
+/*****************************************************************************/
+/*
+ * pick a server to use to try accessing this volume
+ * - returns with an elevated usage count on the server chosen
+ */
+int afs_volume_pick_fileserver(afs_volume_t *volume, afs_server_t **_server)
+{
+	afs_server_t *server;
+	int ret, state, loop;
+
+	_enter("%s",volume->vlocation->vldb.name);
+
+	down_read(&volume->server_sem);
+
+	/* handle the no-server case */
+	if (volume->nservers==0) {
+		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
+		up_read(&volume->server_sem);
+		_leave(" = %d [no servers]",ret);
+		return ret;
+	}
+
+	/* basically, just search the list for the first live server and use that */
+	ret = 0;
+	for (loop=0; loop<volume->nservers; loop++) {
+		server = volume->servers[loop];
+		state = server->fs_state;
+
+		switch (state) {
+			/* found an apparently healthy server */
+		case 0:
+			afs_get_server(server);
+			up_read(&volume->server_sem);
+			*_server = server;
+			_leave(" = 0 (picked %08x)",ntohl(server->addr.s_addr));
+			return 0;
+
+		case -ENETUNREACH:
+			if (ret==0)
+				ret = state;
+			break;
+
+		case -EHOSTUNREACH:
+			if (ret==0 || ret==-ENETUNREACH)
+				ret = state;
+			break;
+
+		case -ECONNREFUSED:
+			if (ret==0 || ret==-ENETUNREACH || ret==-EHOSTUNREACH)
+				ret = state;
+			break;
+
+		default:
+		case -EREMOTEIO:
+			if (ret==0 ||
+			    ret==-ENETUNREACH ||
+			    ret==-EHOSTUNREACH ||
+			    ret==-ECONNREFUSED)
+				ret = state;
+			break;
+		}
+	}
+
+	/* no available servers
+	 * - TODO: handle the no active servers case better
+	 */
+	up_read(&volume->server_sem);
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_volume_pick_fileserver() */
+
+/*****************************************************************************/
+/*
+ * release a server after use
+ * - releases the ref on the server struct that was acquired by picking
+ * - records result of using a particular server to access a volume
+ * - return 0 to try again, 1 if okay or to issue error
+ */
+int afs_volume_release_fileserver(afs_volume_t *volume, afs_server_t *server, int result)
+{
+	unsigned loop;
+
+	_enter("%s,%08x,%d",volume->vlocation->vldb.name,ntohl(server->addr.s_addr),result);
+
+	switch (result) {
+		/* success */
+	case 0:
+		server->fs_act_jif = jiffies;
+		break;
+
+		/* the fileserver denied all knowledge of the volume */
+	case -ENOMEDIUM:
+		server->fs_act_jif = jiffies;
+		down_write(&volume->server_sem);
+
+		/* first, find where the server is in the active list (if it is) */
+		for (loop=0; loop<volume->nservers; loop++)
+			if (volume->servers[loop]==server)
+				goto present;
+
+		/* no longer there - may have been discarded by another op */
+		goto try_next_server_upw;
+
+	present:
+		volume->nservers--;
+		memmove(&volume->servers[loop],
+			&volume->servers[loop+1],
+			sizeof(volume->servers[loop]) * (volume->nservers - loop)
+			);
+		volume->servers[volume->nservers] = NULL;
+		afs_put_server(server);
+		volume->rjservers++;
+
+		if (volume->nservers>0)
+			/* another server might acknowledge its existence */
+			goto try_next_server_upw;
+
+		/* handle the case where all the fileservers have rejected the volume
+		 * - TODO: try asking the fileservers for volume information
+		 * - TODO: contact the VL server again to see if the volume is no longer registered
+		 */
+		up_write(&volume->server_sem);
+		afs_put_server(server);
+		_leave(" [completely rejected]");
+		return 1;
+
+		/* problem reaching the server */
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -EREMOTEIO:
+		/* mark the server as dead
+		 * TODO: vary dead timeout depending on error
+		 */
+		spin_lock(&server->fs_lock);
+		if (!server->fs_state) {
+			server->fs_dead_jif = jiffies + HZ * 10;
+			server->fs_state = result;
+			printk("kAFS: SERVER DEAD state=%d\n",result);
+		}
+		spin_unlock(&server->fs_lock);
+		goto try_next_server;
+
+		/* miscellaneous error */
+	default:
+		server->fs_act_jif = jiffies;
+	case -ENOMEM:
+	case -ENONET:
+		break;
+	}
+
+	/* tell the caller to accept the result */
+	afs_put_server(server);
+	_leave("");
+	return 1;
+
+	/* tell the caller to loop around and try the next server */
+ try_next_server_upw:
+	up_write(&volume->server_sem);
+ try_next_server:
+	afs_put_server(server);
+	_leave(" [try next server]");
+	return 0;
+
+} /* end afs_volume_release_fileserver() */
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
new file mode 100644
index 000000000000..1842d983677f
--- /dev/null
+++ b/fs/afs/volume.h
@@ -0,0 +1,92 @@
+/* volume.h: AFS volume management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VOLUME_H
+#define _LINUX_AFS_VOLUME_H
+
+#include "types.h"
+#include "fsclient.h"
+#include "kafstimod.h"
+#include "kafsasyncd.h"
+#include "cache-layout.h"
+
+#define __packed __attribute__((packed))
+
+typedef enum {
+	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
+	AFS_VLUPD_PENDING,		/* on pending queue */
+	AFS_VLUPD_INPROGRESS,		/* op in progress */
+	AFS_VLUPD_BUSYSLEEP,		/* sleeping because server returned EBUSY */
+	
+} __attribute__((packed)) afs_vlocation_upd_t;
+
+/*****************************************************************************/
+/*
+ * AFS volume location record
+ */
+struct afs_vlocation
+{
+	atomic_t		usage;
+	struct list_head	link;		/* link in cell volume location list */
+	afs_timer_t		timeout;	/* decaching timer */
+	afs_cell_t		*cell;		/* cell to which volume belongs */
+	struct list_head	caches;		/* backing caches */
+	afsc_vldb_record_t	vldb;		/* volume information DB record */
+	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
+	rwlock_t		lock;		/* access lock */
+	unsigned long		read_jif;	/* time at which last read from vlserver */
+	afs_timer_t		upd_timer;	/* update timer */
+	afs_async_op_t		upd_op;		/* update operation */
+	afs_vlocation_upd_t	upd_state;	/* update state */
+	unsigned short		upd_first_svix;	/* first server index during update */
+	unsigned short		upd_curr_svix;	/* current server index during update */
+	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
+	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
+	unsigned short		valid;		/* T if valid */
+};
+
+extern int afs_vlocation_lookup(afs_cell_t *cell, const char *name, afs_vlocation_t **_vlocation);
+
+#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern void __afs_put_vlocation(afs_vlocation_t *vlocation);
+extern void afs_put_vlocation(afs_vlocation_t *vlocation);
+extern void afs_vlocation_do_timeout(afs_vlocation_t *vlocation);
+
+/*****************************************************************************/
+/*
+ * AFS volume access record
+ */
+struct afs_volume
+{
+	atomic_t		usage;
+	afs_cell_t		*cell;		/* cell to which belongs (unrefd ptr) */
+	afs_vlocation_t		*vlocation;	/* volume location */
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t __packed	type;		/* type of volume */
+	char			type_force;	/* force volume type (suppress R/O -> R/W) */
+	unsigned short		nservers;	/* number of server slots filled */
+	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
+	afs_server_t		*servers[8];	/* servers on which volume resides (ordered) */
+	struct rw_semaphore	server_sem;	/* lock for accessing current server */
+};
+
+extern int afs_volume_lookup(char *name, int ro, afs_volume_t **_volume);
+
+#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern void afs_put_volume(afs_volume_t *volume);
+
+extern int afs_volume_pick_fileserver(afs_volume_t *volume, afs_server_t **_server);
+
+extern int afs_volume_release_fileserver(afs_volume_t *volume, afs_server_t *server, int result);
+
+#endif /* _LINUX_AFS_VOLUME_H */
-- 
cgit v1.2.3