Merge nuts.ninka.net:/home/davem/src/BK/network-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
author: David S. Miller <davem@nuts.ninka.net> 2002-10-15 07:41:35 -0700
committer: David S. Miller <davem@nuts.ninka.net> 2002-10-15 07:41:35 -0700
commit: 8fbfe7cd5594010a23cb4e81786d1fb8015ffdee (patch)
tree: b5be190f22984395209823ec3cac1c76fc93f67f /fs
parent: e22f7f5fd43205bfd20ea3a7bb4e689cb3f3d278 (diff)
parent: 5a7728c6d3eb83df9d120944cca4cf476dd326a1 (diff)
138 files changed, 15819 insertions, 2206 deletions
diff --git a/fs/Config.help b/fs/Config.help
index 44622847c4df..76ffd7584add 100644
--- a/fs/Config.help
+++ b/fs/Config.help
@@ -535,6 +535,13 @@ CONFIG_NFS_DIRECTIO
   causes open() to return EINVAL if a file residing in NFS is
   opened with the O_DIRECT flag.
 
+CONFIG_NFS_V4
+  Say Y here if you want your NFS client to be able to speak the newer
+  version 4 of the NFS protocol.  This feature is experimental, and
+  should only be used if you are interested in helping to test NFSv4.
+
+  If unsure, say N.
+
 CONFIG_ROOT_NFS
   If you want your Linux box to mount its whole root file system (the
   one containing the directory /) from some other computer over the
@@ -1137,3 +1144,11 @@ CONFIG_XFS_RT
 
   If unsure, say N.
 
+CONFIG_AFS_FS
+  If you say Y here, you will get an experimental Andrew File System
+  driver. It currently only supports unsecured read-only AFS access.
+
+  See Documentation/filesystems/afs.txt for more intormation.
+
+  If unsure, say N.
+
diff --git a/fs/Config.in b/fs/Config.in
index e6eb844338ec..0464a17a8dbd 100644
--- a/fs/Config.in
+++ b/fs/Config.in
@@ -117,6 +117,7 @@ if [ "$CONFIG_NET" = "y" ]; then
    dep_tristate 'InterMezzo file system support (replicating fs) (EXPERIMENTAL)' CONFIG_INTERMEZZO_FS $CONFIG_INET $CONFIG_EXPERIMENTAL
    dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
    dep_mbool '  Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
+   dep_mbool '  Provide NFSv4 client support (EXPERIMENTAL)' CONFIG_NFS_V4 $CONFIG_NFS_FS $CONFIG_EXPERIMENTAL
    dep_bool '  Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
 
    dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
@@ -157,6 +158,16 @@ if [ "$CONFIG_NET" = "y" ]; then
       # for fs/nls/Config.in
       define_bool CONFIG_NCPFS_NLS n
    fi
+
+   dep_tristate 'Andrew File System support (AFS) (Experimental)' CONFIG_AFS_FS $CONFIG_INET $CONFIG_EXPERIMENTAL
+   if [ "$CONFIG_AFS_FS" = "y" ]; then
+      define_tristate CONFIG_RXRPC y
+   else
+      if [ "$CONFIG_AFS_FS" = "m" ]; then
+	 define_tristate CONFIG_RXRPC m
+      fi
+   fi
+
    endmenu
 
 else
diff --git a/fs/Makefile b/fs/Makefile
index d902bdd8bda3..c28d57ab55a9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,7 +6,7 @@
 # 
 
 export-objs :=	open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \
-                fcntl.o read_write.o
+                fcntl.o read_write.o dcookies.o
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
 		bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
@@ -40,6 +40,8 @@ obj-y				+= partitions/
 obj-y				+= driverfs/
 obj-y				+= devpts/
 
+obj-$(CONFIG_PROFILING)		+= dcookies.o
+ 
 # Do not add any filesystems before this line
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
 obj-$(CONFIG_JBD)		+= jbd/
@@ -84,5 +86,6 @@ obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
+obj-$(CONFIG_AFS_FS)		+= afs/
 
 include $(TOPDIR)/Rules.make
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
new file mode 100644
index 000000000000..753cf8c5b4eb
--- /dev/null
+++ b/fs/afs/Makefile
@@ -0,0 +1,36 @@
+#
+# Makefile for Red Hat Linux AFS client.
+#
+
+kafs-objs := \
+	callback.o \
+	cell.o \
+	cmservice.o \
+	dir.o \
+	file.o \
+	fsclient.o \
+	inode.o \
+	kafsasyncd.o \
+	kafstimod.o \
+	main.o \
+	misc.o \
+	mntpt.o \
+	proc.o \
+	server.o \
+	super.o \
+	vlclient.o \
+	vlocation.o \
+	vnode.o \
+	volume.o
+
+#	cache.o
+
+obj-m  := kafs.o
+
+# superfluous for 2.5, but needed for 2.4..
+ifeq "$(VERSION).$(PATCHLEVEL)" "2.4"
+kafs.o: $(kafs-objs)
+	$(LD) -r -o kafs.o $(kafs-objs)
+endif
+
+include $(TOPDIR)/Rules.make
diff --git a/fs/afs/cache-layout.h b/fs/afs/cache-layout.h
new file mode 100644
index 000000000000..e71afd719a3f
--- /dev/null
+++ b/fs/afs/cache-layout.h
@@ -0,0 +1,224 @@
+/* cache-layout.h: AFS cache layout
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * The cache is stored on a block device and is laid out as:
+ *
+ *  0	+------------------------------------------------
+ *	|
+ *	|  SuperBlock
+ *	|
+ *  1	+------------------------------------------------
+ *	|
+ *	|  file-meta-data File: Data block #0
+ *	|  - file-meta-data file (volix #0 file #0) : Meta-data block
+ *	|    - contains direct pointers to first 64 file data blocks
+ *	|  - Cached cell catalogue file (volix #0 file #1) file: Meta-data block
+ *	|  - Cached volume location catalogue file (volix #0 file #2): Meta-data block
+ *	|  - Vnode catalogue hash bucket #n file: Meta-data block
+ *	|
+ *  2	+------------------------------------------------
+ *	|
+ *	|  Bitmap Block Allocation Bitmap
+ *	|  - 1 bit per block in the bitmap block
+ *      |  - bit 0 of dword 0 refers to the bitmap block 0
+ *	|    - set if the bitmap block is full
+ *      |  - 32768 bits per block, requiring 4 blocks for a 16Tb cache
+ *	|  - bitmap bitmap blocks are cleared initially
+ *	|  - not present if <4 bitmap blocks
+ *	|
+ *	+------------------------------------------------
+ *	|
+ *	|  File Block Allocation Bitmap
+ *	|  - 1 bit per block in the cache
+ *      |  - bit 0 of dword 0 refers to the first block of the data cache
+ *	|    - set if block is allocated
+ *      |  - 32768 bits per block, requiring 131072 blocks for a 16Tb cache
+ *	|  - bitmap blocks are cleared lazily (sb->bix_bitmap_unready)
+ *	|
+ *	+------------------------------------------------
+ *	|
+ *	|  Data Cache
+ *	|
+ *  End	+------------------------------------------------
+ *
+ * Blocks are indexed by an unsigned 32-bit word, meaning that the cache can hold up to 2^32 pages,
+ * or 16Tb in total.
+ *
+ * Credentials will be cached in memory, since they are subject to change without notice, and are
+ * difficult to derive manually, being constructed from the following information:
+ * - per vnode user ID and mode mask
+ * - parent directory ACL
+ * - directory ACL (dirs only)
+ * - group lists from ptserver
+ */
+
+#ifndef _LINUX_AFS_CACHE_LAYOUT_H
+#define _LINUX_AFS_CACHE_LAYOUT_H
+
+#include "types.h"
+
+typedef u32 afsc_blockix_t;
+typedef u32 afsc_cellix_t;
+
+/* Cached volume index
+ * - afsc_volix_t/4 is the index into the volume cache
+ * - afsc_volix_t%4 is 0 for R/W, 1 for R/O and 2 for Bak (3 is not used)
+ * - afsc_volix_t==0-3 refers to a "virtual" volume that stores meta-data about the cache
+ */
+typedef struct {
+	u32 index;
+} afsc_volix_t;
+
+#define AFSC_VNCAT_HASH_NBUCKETS	128
+
+/* special meta file IDs (all cell 0 vol 0) */
+enum afsc_meta_fids {
+	AFSC_META_FID_METADATA		= 0,
+	AFSC_META_FID_CELL_CATALOGUE	= 1,
+	AFSC_META_FID_VLDB_CATALOGUE	= 2,
+	AFSC_META_FID_VNODE_CATALOGUE0	= 3,
+	AFSC_META_FID__COUNT		= AFSC_VNCAT_HASH_NBUCKETS + 3
+};
+
+/*****************************************************************************/
+/*
+ * cache superblock block layout
+ * - the blockdev is prepared for initialisation by 'echo "kafsuninit" >/dev/hdaXX' before mounting
+ * - when initialised, the magic number is changed to "kafs-cache"
+ */
+struct afsc_super_block
+{
+	char			magic[10];	/* magic number */
+#define AFSC_SUPER_MAGIC "kafs-cache"
+#define AFSC_SUPER_MAGIC_NEEDS_INIT "kafsuninit"
+#define AFSC_SUPER_MAGIC_SIZE 10
+
+	unsigned short		endian;		/* 0x1234 stored CPU-normal order */
+#define AFSC_SUPER_ENDIAN 0x1234
+
+	unsigned		version;	/* format version */
+#define AFSC_SUPER_VERSION 1
+
+	/* layout */
+	unsigned		bsize;			/* cache block size */
+	afsc_blockix_t		bix_bitmap_fullmap;	/* block ix of bitmap full bitmap */
+	afsc_blockix_t		bix_bitmap;		/* block ix of alloc bitmap */
+	afsc_blockix_t		bix_bitmap_unready;	/* block ix of unready area of bitmap */
+	afsc_blockix_t		bix_cache;		/* block ix of data cache */
+	afsc_blockix_t		bix_end;		/* block ix of end of cache */
+};
+
+/*****************************************************************************/
+/*
+ * vnode (inode) metadata cache record
+ * - padded out to 512 bytes and stored eight to a page
+ * - only the data version is necessary
+ *   - disconnected operation is not supported
+ *   - afs_iget() contacts the server to get the meta-data _anyway_ when an inode is first brought
+ *     into memory
+ * - at least 64 direct block pointers will be available (a directory is max 256Kb)
+ * - any block pointer which is 0 indicates an uncached page
+ */
+struct afsc_vnode_meta
+{
+	/* file ID */
+	afsc_volix_t		volume_ix;	/* volume catalogue index */
+	unsigned		vnode;		/* vnode number */
+	unsigned		unique;		/* FID unique */
+	unsigned		size;		/* size of file */
+	time_t			mtime;		/* last modification time */
+
+	/* file status */
+	afs_dataversion_t	version;	/* current data version */
+
+	/* file contents */
+	afsc_blockix_t		dbl_indirect;	/* double indirect block index */
+	afsc_blockix_t		indirect;	/* single indirect block 0 index */
+	afsc_blockix_t		direct[0];	/* direct block index (#AFSC_VNODE_META_DIRECT) */
+};
+
+#define AFSC_VNODE_META_RECSIZE	512	/* record size */
+
+#define AFSC_VNODE_META_DIRECT	\
+	((AFSC_VNODE_META_RECSIZE-sizeof(struct afsc_vnode_meta))/sizeof(afsc_blockix_t))
+
+#define AFSC_VNODE_META_PER_PAGE	(PAGE_SIZE / AFSC_VNODE_META_RECSIZE)
+
+/*****************************************************************************/
+/*
+ * entry in the cached cell catalogue
+ */
+struct afsc_cell_record
+{
+	char			name[64];	/* cell name (padded with NULs) */
+	struct in_addr		servers[16];	/* cached cell servers */
+};
+
+/*****************************************************************************/
+/*
+ * entry in the cached volume location catalogue
+ * - indexed by afsc_volix_t/4
+ */
+struct afsc_vldb_record
+{
+	char			name[64];	/* volume name (padded with NULs) */
+	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
+	unsigned char		vidmask;	/* voltype mask for vid[] */
+	unsigned char		_pad[1];
+	unsigned short		nservers;	/* number of entries used in servers[] */
+	struct in_addr		servers[8];	/* fileserver addresses */
+	unsigned char		srvtmask[8];	/* voltype masks for servers[] */
+#define AFSC_VOL_STM_RW	0x01 /* server holds a R/W version of the volume */
+#define AFSC_VOL_STM_RO	0x02 /* server holds a R/O version of the volume */
+#define AFSC_VOL_STM_BAK	0x04 /* server holds a backup version of the volume */
+
+	afsc_cellix_t		cell_ix;	/* cell catalogue index (MAX_UINT if unused) */
+	time_t			ctime;		/* time at which cached */
+};
+
+/*****************************************************************************/
+/*
+ * vnode catalogue entry
+ * - must be 2^x size so that do_generic_file_read doesn't present them split across pages
+ */
+struct afsc_vnode_catalogue
+{
+	afsc_volix_t		volume_ix;	/* volume catalogue index */
+	afs_vnodeid_t		vnode;		/* vnode ID */
+	u32			meta_ix;	/* metadata file index */
+	u32			atime;		/* last time entry accessed */
+} __attribute__((packed));
+
+#define AFSC_VNODE_CATALOGUE_PER_BLOCK ((size_t)(PAGE_SIZE/sizeof(struct afsc_vnode_catalogue)))
+
+/*****************************************************************************/
+/*
+ * vnode data "page directory" block
+ * - first 1024 pages don't map through here
+ * - PAGE_SIZE in size
+ */
+struct afsc_indirect_block
+{
+	afsc_blockix_t		pt_bix[1024];	/* "page table" block indices */
+};
+
+/*****************************************************************************/
+/*
+ * vnode data "page table" block
+ * - PAGE_SIZE in size
+ */
+struct afsc_dbl_indirect_block
+{
+	afsc_blockix_t		page_bix[1024];	/* "page" block indices */
+};
+
+
+#endif /* _LINUX_AFS_CACHE_LAYOUT_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
new file mode 100644
index 000000000000..8d030bd67aa0
--- /dev/null
+++ b/fs/afs/callback.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include "server.h"
+#include "vnode.h"
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to request callback state (re-)initialisation
+ */
+int SRXAFSCM_InitCallBackState(afs_server_t *server)
+{
+	struct list_head callbacks;
+
+	_enter("%p",server);
+
+	INIT_LIST_HEAD(&callbacks);
+
+	/* transfer the callback list from the server to a temp holding area */
+	spin_lock(&server->cb_lock);
+
+	list_add(&callbacks,&server->cb_promises);
+	list_del_init(&server->cb_promises);
+
+	/* munch our way through the list, grabbing the inode, dropping all the locks and regetting
+	 * them in the right order
+	 */
+	while (!list_empty(&callbacks)) {
+		struct inode *inode;
+		afs_vnode_t *vnode;
+
+		vnode = list_entry(callbacks.next,afs_vnode_t,cb_link);
+		list_del_init(&vnode->cb_link);
+
+		/* try and grab the inode - may fail */
+		inode = igrab(AFS_VNODE_TO_I(vnode));
+		if (inode) {
+			int release = 0;
+
+			spin_unlock(&server->cb_lock);
+			spin_lock(&vnode->lock);
+
+			if (vnode->cb_server==server) {
+				vnode->cb_server = NULL;
+				afs_kafstimod_del_timer(&vnode->cb_timeout);
+				spin_lock(&afs_cb_hash_lock);
+				list_del_init(&vnode->cb_hash_link);
+				spin_unlock(&afs_cb_hash_lock);
+				release = 1;
+			}
+
+			spin_unlock(&vnode->lock);
+
+			iput(inode);
+			if (release) afs_put_server(server);
+
+			spin_lock(&server->cb_lock);
+		}
+	}
+
+	spin_unlock(&server->cb_lock);
+
+	_leave(" = 0");
+	return 0;
+} /* end SRXAFSCM_InitCallBackState() */
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to break callback promises
+ */
+int SRXAFSCM_CallBack(afs_server_t *server, size_t count, afs_callback_t callbacks[])
+{
+	struct list_head *_p;
+
+	_enter("%p,%u,",server,count);
+
+	for (; count>0; callbacks++, count--) {
+		struct inode *inode = NULL;
+		afs_vnode_t *vnode = NULL;
+		int valid = 0;
+
+		_debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
+		       callbacks->fid.vid,
+		       callbacks->fid.vnode,
+		       callbacks->fid.unique,
+		       callbacks->version,
+		       callbacks->expiry,
+		       callbacks->type
+		       );
+
+		/* find the inode for this fid */
+		spin_lock(&afs_cb_hash_lock);
+
+		list_for_each(_p,&afs_cb_hash(server,&callbacks->fid)) {
+			vnode = list_entry(_p,afs_vnode_t,cb_hash_link);
+
+			if (memcmp(&vnode->fid,&callbacks->fid,sizeof(afs_fid_t))!=0)
+				continue;
+
+			/* right vnode, but is it same server? */
+			if (vnode->cb_server!=server)
+				break; /* no */
+
+			/* try and nail the inode down */
+			inode = igrab(AFS_VNODE_TO_I(vnode));
+			break;
+		}
+
+		spin_unlock(&afs_cb_hash_lock);
+
+		if (inode) {
+			/* we've found the record for this vnode */
+			spin_lock(&vnode->lock);
+			if (vnode->cb_server==server) {
+				/* the callback _is_ on the calling server */
+				vnode->cb_server = NULL;
+				valid = 1;
+
+				afs_kafstimod_del_timer(&vnode->cb_timeout);
+				vnode->flags |= AFS_VNODE_CHANGED;
+
+				spin_lock(&server->cb_lock);
+				list_del_init(&vnode->cb_link);
+				spin_unlock(&server->cb_lock);
+
+				spin_lock(&afs_cb_hash_lock);
+				list_del_init(&vnode->cb_hash_link);
+				spin_unlock(&afs_cb_hash_lock);
+			}
+			spin_unlock(&vnode->lock);
+
+			if (valid) {
+				invalidate_inode_pages(inode->i_mapping);
+				afs_put_server(server);
+			}
+			iput(inode);
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+} /* end SRXAFSCM_CallBack() */
+
+/*****************************************************************************/
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+int SRXAFSCM_Probe(afs_server_t *server)
+{
+	_debug("SRXAFSCM_Probe(%p)\n",server);
+	return 0;
+} /* end SRXAFSCM_Probe() */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
new file mode 100644
index 000000000000..f7f00a2bec9e
--- /dev/null
+++ b/fs/afs/cell.c
@@ -0,0 +1,452 @@
+/* cell.c: AFS cell and server record management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include "volume.h"
+#include "cell.h"
+#include "server.h"
+#include "transport.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include "super.h"
+#include "internal.h"
+
+DECLARE_RWSEM(afs_proc_cells_sem);
+LIST_HEAD(afs_proc_cells);
+
+static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
+static rwlock_t afs_cells_lock = RW_LOCK_UNLOCKED;
+static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
+static afs_cell_t *afs_cell_root;
+
+static char *rootcell;
+
+MODULE_PARM(rootcell,"s");
+MODULE_PARM_DESC(rootcell,"root AFS cell name and VL server IP addr list");
+
+/*****************************************************************************/
+/*
+ * create a cell record
+ * - "name" is the name of the cell
+ * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ */
+int afs_cell_create(const char *name, char *vllist, afs_cell_t **_cell)
+{
+	afs_cell_t *cell;
+	char *next;
+	int ret;
+
+	_enter("%s",name);
+
+	if (!name) BUG(); /* TODO: want to look up "this cell" in the cache */
+
+	down_write(&afs_cells_sem);
+
+	/* allocate and initialise a cell record */
+	cell = kmalloc(sizeof(afs_cell_t) + strlen(name) + 1,GFP_KERNEL);
+	if (!cell) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(cell,0,sizeof(afs_cell_t));
+	atomic_set(&cell->usage,0);
+
+	INIT_LIST_HEAD(&cell->link);
+	INIT_LIST_HEAD(&cell->caches);
+
+	rwlock_init(&cell->sv_lock);
+	INIT_LIST_HEAD(&cell->sv_list);
+	INIT_LIST_HEAD(&cell->sv_graveyard);
+	spin_lock_init(&cell->sv_gylock);
+
+	init_rwsem(&cell->vl_sem);
+	INIT_LIST_HEAD(&cell->vl_list);
+	INIT_LIST_HEAD(&cell->vl_graveyard);
+	spin_lock_init(&cell->vl_gylock);
+
+	strcpy(cell->name,name);
+
+	/* fill in the VL server list from the rest of the string */
+	ret = -EINVAL;
+	do {
+		unsigned a, b, c, d;
+
+		next = strchr(vllist,':');
+		if (next) *next++ = 0;
+
+		if (sscanf(vllist,"%u.%u.%u.%u",&a,&b,&c,&d)!=4)
+			goto badaddr;
+
+		if (a>255 || b>255 || c>255 || d>255)
+			goto badaddr;
+
+		cell->vl_addrs[cell->vl_naddrs++].s_addr =
+			htonl((a<<24)|(b<<16)|(c<<8)|d);
+
+		if (cell->vl_naddrs>=16)
+			break;
+
+	} while(vllist=next, vllist);
+
+	/* add a proc dir for this cell */
+	ret = afs_proc_cell_setup(cell);
+	if (ret<0)
+		goto error;
+
+	/* add to the cell lists */
+	write_lock(&afs_cells_lock);
+	list_add_tail(&cell->link,&afs_cells);
+	write_unlock(&afs_cells_lock);
+
+	down_write(&afs_proc_cells_sem);
+	list_add_tail(&cell->proc_link,&afs_proc_cells);
+	up_write(&afs_proc_cells_sem);
+
+	*_cell = cell;
+	up_write(&afs_cells_sem);
+
+	_leave(" = 0 (%p)",cell);
+	return 0;
+
+ badaddr:
+	printk("kAFS: bad VL server IP address: '%s'\n",vllist);
+ error:
+	up_write(&afs_cells_sem);
+	kfree(afs_cell_root);
+	return ret;
+} /* end afs_cell_create() */
+
+/*****************************************************************************/
+/*
+ * initialise the cell database from module parameters
+ */
+int afs_cell_init(void)
+{
+	char *cp;
+	int ret;
+
+	_enter("");
+
+	if (!rootcell) {
+		printk("kAFS: no root cell specified\n");
+		return -EINVAL;
+	}
+
+	cp = strchr(rootcell,':');
+	if (!cp) {
+		printk("kAFS: no VL server IP addresses specified\n");
+		return -EINVAL;
+	}
+
+	/* allocate a cell record for the root cell */
+	*cp++ = 0;
+	ret = afs_cell_create(rootcell,cp,&afs_cell_root);
+	if (ret==0)
+		afs_get_cell(afs_cell_root);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_cell_init() */
+
+/*****************************************************************************/
+/*
+ * lookup a cell record
+ */
+int afs_cell_lookup(const char *name, afs_cell_t **_cell)
+{
+	struct list_head *_p;
+	afs_cell_t *cell;
+
+	_enter("\"%s\",",name?name:"*thiscell*");
+
+	cell = afs_cell_root;
+
+	if (name) {
+		/* if the cell was named, look for it in the cell record list */
+		cell = NULL;
+		read_lock(&afs_cells_lock);
+
+		list_for_each(_p,&afs_cells) {
+			cell = list_entry(_p,afs_cell_t,link);
+			if (strcmp(cell->name,name)==0)
+				break;
+			cell = NULL;
+		}
+
+		read_unlock(&afs_cells_lock);
+	}
+
+	if (cell)
+		afs_get_cell(cell);
+
+	*_cell = cell;
+	_leave(" = %d (%p)",cell?0:-ENOENT,cell);
+	return cell ? 0 : -ENOENT;
+
+} /* end afs_cell_lookup() */
+
+/*****************************************************************************/
+/*
+ * try and get a cell record
+ */
+afs_cell_t *afs_get_cell_maybe(afs_cell_t **_cell)
+{
+	afs_cell_t *cell;
+
+	write_lock(&afs_cells_lock);
+
+	cell = *_cell;
+	if (cell && !list_empty(&cell->link))
+		atomic_inc(&cell->usage);
+	else 
+		cell = NULL;
+
+	write_unlock(&afs_cells_lock);
+
+	return cell;
+} /* end afs_get_cell_maybe() */
+
+/*****************************************************************************/
+/*
+ * destroy a cell record
+ */
+void afs_put_cell(afs_cell_t *cell)
+{
+	_enter("%p{%d,%s}",cell,atomic_read(&cell->usage),cell->name);
+
+	/* sanity check */
+	if (atomic_read(&cell->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&afs_cells_lock);
+
+	if (likely(!atomic_dec_and_test(&cell->usage))) {
+		write_unlock(&afs_cells_lock);
+		_leave("");
+		return;
+	}
+
+	write_unlock(&afs_cells_lock);
+
+	if (!list_empty(&cell->sv_list))	BUG();
+	if (!list_empty(&cell->sv_graveyard))	BUG();
+	if (!list_empty(&cell->vl_list))	BUG();
+	if (!list_empty(&cell->vl_graveyard))	BUG();
+
+	_leave(" [unused]");
+} /* end afs_put_cell() */
+
+/*****************************************************************************/
+/*
+ * destroy a cell record
+ */
+static void afs_cell_destroy(afs_cell_t *cell)
+{
+	_enter("%p{%d,%s}",cell,atomic_read(&cell->usage),cell->name);
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&afs_cells_lock);
+
+	/* sanity check */
+	if (atomic_read(&cell->usage)!=0)
+		BUG();
+
+	list_del_init(&cell->link);
+
+	write_unlock(&afs_cells_lock);
+
+	down_write(&afs_cells_sem);
+
+	afs_proc_cell_remove(cell);
+
+	down_write(&afs_proc_cells_sem);
+	list_del_init(&cell->proc_link);
+	up_write(&afs_proc_cells_sem);
+
+	up_write(&afs_cells_sem);
+
+	if (!list_empty(&cell->sv_list))	BUG();
+	if (!list_empty(&cell->sv_graveyard))	BUG();
+	if (!list_empty(&cell->vl_list))	BUG();
+	if (!list_empty(&cell->vl_graveyard))	BUG();
+
+	/* finish cleaning up the cell */
+	kfree(cell);
+
+	_leave(" [destroyed]");
+} /* end afs_cell_destroy() */
+
+/*****************************************************************************/
+/*
+ * lookup the server record corresponding to an Rx RPC peer
+ */
+int afs_server_find_by_peer(const struct rxrpc_peer *peer, afs_server_t **_server)
+{
+	struct list_head *_pc, *_ps;
+	afs_server_t *server;
+	afs_cell_t *cell;
+
+	_enter("%p{a=%08x},",peer,ntohl(peer->addr.s_addr));
+
+	/* search the cell list */
+	read_lock(&afs_cells_lock);
+
+	list_for_each(_pc,&afs_cells) {
+		cell = list_entry(_pc,afs_cell_t,link);
+
+		_debug("? cell %s",cell->name);
+
+		write_lock(&cell->sv_lock);
+
+		/* check the active list */
+		list_for_each(_ps,&cell->sv_list) {
+			server = list_entry(_ps,afs_server_t,link);
+
+			_debug("?? server %08x",ntohl(server->addr.s_addr));
+
+			if (memcmp(&server->addr,&peer->addr,sizeof(struct in_addr))==0)
+				goto found_server;
+		}
+
+		/* check the inactive list */
+		spin_lock(&cell->sv_gylock);
+		list_for_each(_ps,&cell->sv_graveyard) {
+			server = list_entry(_ps,afs_server_t,link);
+
+			_debug("?? dead server %08x",ntohl(server->addr.s_addr));
+
+			if (memcmp(&server->addr,&peer->addr,sizeof(struct in_addr))==0)
+				goto found_dead_server;
+		}
+		spin_unlock(&cell->sv_gylock);
+
+		write_unlock(&cell->sv_lock);
+	}
+	read_unlock(&afs_cells_lock);
+
+	_leave(" = -ENOENT");
+	return -ENOENT;
+
+	/* we found it in the graveyard - resurrect it */
+ found_dead_server:
+	list_del(&server->link);
+	list_add_tail(&server->link,&cell->sv_list);
+	afs_get_server(server);
+	afs_kafstimod_del_timer(&server->timeout);
+	spin_unlock(&cell->sv_gylock);
+	goto success;
+
+	/* we found it - increment its ref count and return it */
+ found_server:
+	afs_get_server(server);
+
+ success:
+	write_unlock(&cell->sv_lock);
+	read_unlock(&afs_cells_lock);
+
+	*_server = server;
+	_leave(" = 0 (s=%p c=%p)",server,cell);
+	return 0;
+
+} /* end afs_server_find_by_peer() */
+
+/*****************************************************************************/
+/*
+ * purge in-memory cell database on module unload
+ * - the timeout daemon is stopped before calling this
+ */
+void afs_cell_purge(void)
+{
+	afs_vlocation_t *vlocation;
+	afs_cell_t *cell;
+
+	_enter("");
+
+	if (afs_cell_root)
+		afs_put_cell(afs_cell_root);
+
+	while (!list_empty(&afs_cells)) {
+		cell = NULL;
+
+		/* remove the next cell from the front of the list */
+		write_lock(&afs_cells_lock);
+
+		if (!list_empty(&afs_cells)) {
+			cell = list_entry(afs_cells.next,afs_cell_t,link);
+			list_del_init(&cell->link);
+		}
+
+		write_unlock(&afs_cells_lock);
+
+		if (cell) {
+			_debug("PURGING CELL %s (%d)",cell->name,atomic_read(&cell->usage));
+
+			if (!list_empty(&cell->sv_list)) BUG();
+			if (!list_empty(&cell->vl_list)) BUG();
+
+			/* purge the cell's VL graveyard list */
+			_debug(" - clearing VL graveyard");
+
+			spin_lock(&cell->vl_gylock);
+
+			while (!list_empty(&cell->vl_graveyard)) {
+				vlocation = list_entry(cell->vl_graveyard.next,
+						       afs_vlocation_t,link);
+				list_del_init(&vlocation->link);
+
+				afs_kafstimod_del_timer(&vlocation->timeout);
+
+				spin_unlock(&cell->vl_gylock);
+
+				afs_vlocation_do_timeout(vlocation);
+				/* TODO: race if move to use krxtimod instead of kafstimod */
+
+				spin_lock(&cell->vl_gylock);
+			}
+
+			spin_unlock(&cell->vl_gylock);
+
+			/* purge the cell's server graveyard list */
+			_debug(" - clearing server graveyard");
+
+			spin_lock(&cell->sv_gylock);
+
+			while (!list_empty(&cell->sv_graveyard)) {
+				afs_server_t *server;
+
+				server = list_entry(cell->sv_graveyard.next,afs_server_t,link);
+				list_del_init(&server->link);
+
+				afs_kafstimod_del_timer(&server->timeout);
+
+				spin_unlock(&cell->sv_gylock);
+
+				afs_server_do_timeout(server);
+
+				spin_lock(&cell->sv_gylock);
+			}
+
+			spin_unlock(&cell->sv_gylock);
+
+			/* now the cell should be left with no references */
+			afs_cell_destroy(cell);
+		}
+	}
+
+	_leave("");
+} /* end afs_cell_purge() */
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
new file mode 100644
index 000000000000..48eb9fa91f19
--- /dev/null
+++ b/fs/afs/cell.h
@@ -0,0 +1,63 @@
+/* cell.h: AFS cell record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_CELL_H
+#define _LINUX_AFS_CELL_H
+
+#include "types.h"
+
+extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
+
+/*****************************************************************************/
+/*
+ * AFS cell record
+ */
+struct afs_cell
+{
+	atomic_t		usage;
+	struct list_head	link;		/* main cell list link */
+	struct list_head	proc_link;	/* /proc cell list link */
+	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
+	struct list_head	caches;		/* list of caches currently backing this cell */
+
+	/* server record management */
+	rwlock_t		sv_lock;	/* active server list lock */
+	struct list_head	sv_list;	/* active server list */
+	struct list_head	sv_graveyard;	/* inactive server list */
+	spinlock_t		sv_gylock;	/* inactive server list lock */
+
+	/* volume location record management */
+	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
+	struct list_head	vl_list;	/* cell's active VL record list */
+	struct list_head	vl_graveyard;	/* cell's inactive VL record list */
+	spinlock_t		vl_gylock;	/* graveyard lock */
+	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
+	unsigned short		vl_curr_svix;	/* current server index */
+	struct in_addr		vl_addrs[16];	/* cell VL server addresses */
+
+	char			name[0];	/* cell name - must go last */
+};
+
+extern int afs_cell_init(void);
+
+extern int afs_cell_create(const char *name, char *vllist, afs_cell_t **_cell);
+
+extern int afs_cell_lookup(const char *name, afs_cell_t **_cell);
+
+#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
+
+extern afs_cell_t *afs_get_cell_maybe(afs_cell_t **_cell);
+
+extern void afs_put_cell(afs_cell_t *cell);
+
+extern void afs_cell_purge(void);
+
+#endif /* _LINUX_AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
new file mode 100644
index 000000000000..b95c3625257a
--- /dev/null
+++ b/fs/afs/cmservice.c
@@ -0,0 +1,639 @@
+/* cmservice.c: AFS Cache Manager Service
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "server.h"
+#include "cell.h"
+#include "transport.h"
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "cmservice.h"
+#include "internal.h"
+
+static unsigned afscm_usage;		/* AFS cache manager usage count */
+static struct rw_semaphore afscm_sem;	/* AFS cache manager start/stop semaphore */
+
+static int afscm_new_call(struct rxrpc_call *call);
+static void afscm_attention(struct rxrpc_call *call);
+static void afscm_error(struct rxrpc_call *call);
+static void afscm_aemap(struct rxrpc_call *call);
+
+static void _SRXAFSCM_CallBack(struct rxrpc_call *call);
+static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call);
+static void _SRXAFSCM_Probe(struct rxrpc_call *call);
+
+typedef void (*_SRXAFSCM_xxxx_t)(struct rxrpc_call *call);
+
+static const struct rxrpc_operation AFSCM_ops[] = {
+	{
+		.id	= 204,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "CallBack",
+		.user	= _SRXAFSCM_CallBack,
+	},
+	{
+		.id	= 205,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "InitCallBackState",
+		.user	= _SRXAFSCM_InitCallBackState,
+	},
+	{
+		.id	= 206,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "Probe",
+		.user	= _SRXAFSCM_Probe,
+	},
+#if 0
+	{
+		.id	= 207,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetLock",
+		.user	= _SRXAFSCM_GetLock,
+	},
+	{
+		.id	= 208,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetCE",
+		.user	= _SRXAFSCM_GetCE,
+	},
+	{
+		.id	= 209,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetXStatsVersion",
+		.user	= _SRXAFSCM_GetXStatsVersion,
+	},
+	{
+		.id	= 210,
+		.asize	= RXRPC_APP_MARK_EOF,
+		.name	= "GetXStats",
+		.user	= _SRXAFSCM_GetXStats,
+	}
+#endif
+};
+
+static struct rxrpc_service AFSCM_service = {
+	.name		= "AFS/CM",
+	.owner		= THIS_MODULE,
+	.link		= LIST_HEAD_INIT(AFSCM_service.link),
+	.new_call	= afscm_new_call,
+	.service_id	= 1,
+	.attn_func	= afscm_attention,
+	.error_func	= afscm_error,
+	.aemap_func	= afscm_aemap,
+	.ops_begin	= &AFSCM_ops[0],
+	.ops_end	= &AFSCM_ops[sizeof(AFSCM_ops)/sizeof(AFSCM_ops[0])],
+};
+
+static DECLARE_COMPLETION(kafscmd_alive);
+static DECLARE_COMPLETION(kafscmd_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafscmd_sleepq);
+static LIST_HEAD(kafscmd_attention_list);
+static LIST_HEAD(afscm_calls);
+static spinlock_t afscm_calls_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t kafscmd_attention_lock = SPIN_LOCK_UNLOCKED;
+static int kafscmd_die;
+
+/*****************************************************************************/
+/*
+ * AFS Cache Manager kernel thread
+ */
+static int kafscmd(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_call *call;
+	_SRXAFSCM_xxxx_t func;
+	int die;
+
+	printk("kAFS: Started kafscmd %d\n",current->pid);
+	strcpy(current->comm,"kafscmd");
+
+	daemonize();
+
+	complete(&kafscmd_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+	do {
+		if (list_empty(&kafscmd_attention_list)) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&kafscmd_sleepq,&myself);
+
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (!list_empty(&kafscmd_attention_list) ||
+				    signal_pending(current) ||
+				    kafscmd_die)
+					break;
+
+				schedule();
+			}
+
+			remove_wait_queue(&kafscmd_sleepq,&myself);
+			set_current_state(TASK_RUNNING);
+		}
+
+		die = kafscmd_die;
+
+		/* dequeue the next call requiring attention */
+		call = NULL;
+		spin_lock(&kafscmd_attention_lock);
+
+		if (!list_empty(&kafscmd_attention_list)) {
+			call = list_entry(kafscmd_attention_list.next,
+					  struct rxrpc_call,
+					  app_attn_link);
+			list_del_init(&call->app_attn_link);
+			die = 0;
+		}
+
+		spin_unlock(&kafscmd_attention_lock);
+
+		if (call) {
+			/* act upon it */
+			_debug("@@@ Begin Attend Call %p",call);
+
+			func = call->app_user;
+			if (func)
+				func(call);
+
+			rxrpc_put_call(call);
+
+			_debug("@@@ End Attend Call %p",call);
+		}
+
+	} while(!die);
+
+	/* and that's all */
+	complete_and_exit(&kafscmd_dead,0);
+
+} /* end kafscmd() */
+
+/*****************************************************************************/
+/*
+ * handle a call coming in to the cache manager
+ * - if I want to keep the call, I must increment its usage count
+ * - the return value will be negated and passed back in an abort packet if non-zero
+ * - serialised by virtue of there only being one krxiod
+ */
+static int afscm_new_call(struct rxrpc_call *call)
+{
+	_enter("%p{cid=%u u=%d}",call,ntohl(call->call_id),atomic_read(&call->usage));
+
+	rxrpc_get_call(call);
+
+	/* add to my current call list */
+	spin_lock(&afscm_calls_lock);
+	list_add(&call->app_link,&afscm_calls);
+	spin_unlock(&afscm_calls_lock);
+
+	_leave(" = 0");
+	return 0;
+
+} /* end afscm_new_call() */
+
+/*****************************************************************************/
+/*
+ * queue on the kafscmd queue for attention
+ */
+static void afscm_attention(struct rxrpc_call *call)
+{
+	_enter("%p{cid=%u u=%d}",call,ntohl(call->call_id),atomic_read(&call->usage));
+
+	spin_lock(&kafscmd_attention_lock);
+
+	if (list_empty(&call->app_attn_link)) {
+		list_add_tail(&call->app_attn_link,&kafscmd_attention_list);
+		rxrpc_get_call(call);
+	}
+
+	spin_unlock(&kafscmd_attention_lock);
+
+	wake_up(&kafscmd_sleepq);
+
+	_leave(" {u=%d}",atomic_read(&call->usage));
+} /* end afscm_attention() */
+
+/*****************************************************************************/
+/*
+ * handle my call being aborted
+ * - clean up, dequeue and put my ref to the call
+ */
+static void afscm_error(struct rxrpc_call *call)
+{
+	int removed;
+
+	_enter("%p{est=%s ac=%u er=%d}",
+	       call,
+	       rxrpc_call_error_states[call->app_err_state],
+	       call->app_abort_code,
+	       call->app_errno);
+
+	spin_lock(&kafscmd_attention_lock);
+
+	if (list_empty(&call->app_attn_link)) {
+		list_add_tail(&call->app_attn_link,&kafscmd_attention_list);
+		rxrpc_get_call(call);
+	}
+
+	spin_unlock(&kafscmd_attention_lock);
+
+	removed = 0;
+	spin_lock(&afscm_calls_lock);
+	if (!list_empty(&call->app_link)) {
+		list_del_init(&call->app_link);
+		removed = 1;
+	}
+	spin_unlock(&afscm_calls_lock);
+
+	if (removed)
+		rxrpc_put_call(call);
+
+	wake_up(&kafscmd_sleepq);
+
+	_leave("");
+} /* end afscm_error() */
+
+/*****************************************************************************/
+/*
+ * map afs abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afscm_aemap(struct rxrpc_call *call)
+{
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		break;
+	case RXRPC_ESTATE_PEER_ABORT:
+		call->app_errno = -ECONNABORTED;
+		break;
+	default:
+		break;
+	}
+} /* end afscm_aemap() */
+
+/*****************************************************************************/
+/*
+ * start the cache manager service if not already started
+ */
+int afscm_start(void)
+{
+	int ret;
+
+	down_write(&afscm_sem);
+	if (!afscm_usage) {
+		ret = kernel_thread(kafscmd,NULL,0);
+		if (ret<0)
+			goto out;
+
+		wait_for_completion(&kafscmd_alive);
+
+		ret = rxrpc_add_service(afs_transport,&AFSCM_service);
+		if (ret<0)
+			goto kill;
+	}
+
+	afscm_usage++;
+	up_write(&afscm_sem);
+
+	return 0;
+
+ kill:
+	kafscmd_die = 1;
+	wake_up(&kafscmd_sleepq);
+	wait_for_completion(&kafscmd_dead);
+
+ out:
+	up_write(&afscm_sem);
+	return ret;
+
+} /* end afscm_start() */
+
+/*****************************************************************************/
+/*
+ * stop the cache manager service
+ */
+void afscm_stop(void)
+{
+	struct rxrpc_call *call;
+
+	down_write(&afscm_sem);
+
+	if (afscm_usage==0) BUG();
+	afscm_usage--;
+
+	if (afscm_usage==0) {
+		/* don't want more incoming calls */
+		rxrpc_del_service(afs_transport,&AFSCM_service);
+
+		/* abort any calls I've still got open (the afscm_error() will dequeue them) */
+		spin_lock(&afscm_calls_lock);
+		while (!list_empty(&afscm_calls)) {
+			call = list_entry(afscm_calls.next,struct rxrpc_call,app_link);
+			list_del_init(&call->app_link);
+			rxrpc_get_call(call);
+			spin_unlock(&afscm_calls_lock);
+
+			rxrpc_call_abort(call,-ESRCH); /* abort, dequeue and put */
+
+			rxrpc_put_call(call);
+
+			spin_lock(&afscm_calls_lock);
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		/* get rid of my daemon */
+		kafscmd_die = 1;
+		wake_up(&kafscmd_sleepq);
+		wait_for_completion(&kafscmd_dead);
+
+		/* dispose of any calls waiting for attention */
+		spin_lock(&kafscmd_attention_lock);
+		while (!list_empty(&kafscmd_attention_list)) {
+			call = list_entry(kafscmd_attention_list.next,
+					  struct rxrpc_call,
+					  app_attn_link);
+
+			list_del_init(&call->app_attn_link);
+			spin_unlock(&kafscmd_attention_lock);
+
+			rxrpc_put_call(call);
+
+			spin_lock(&kafscmd_attention_lock);
+		}
+		spin_unlock(&kafscmd_attention_lock);
+	}
+
+	up_write(&afscm_sem);
+
+} /* end afscm_stop() */
+
+/*****************************************************************************/
+/*
+ * handle the fileserver breaking a set of callbacks
+ */
+static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count, qty, tmp;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet
+		 * - drain all the data from the call and send the reply
+		 */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		ret = -EBADMSG;
+		qty = call->app_ready_qty;
+		if (qty<8 || qty>50*(6*4)+8)
+			break;
+
+		{
+			afs_callback_t *cb, *pcb;
+			int loop;
+			u32 *fp, *bp;
+
+			fp = rxrpc_call_alloc_scratch(call,qty);
+
+			/* drag the entire argument block out to the scratch space */
+			ret = rxrpc_call_read_data(call,fp,qty,0);
+			if (ret<0)
+				break;
+
+			/* and unmarshall the parameter block */
+			ret = -EBADMSG;
+			count = ntohl(*fp++);
+			if (count>AFSCBMAX ||
+			    (count*(3*4)+8 != qty && count*(6*4)+8 != qty))
+				break;
+
+			bp = fp + count*3;
+			tmp = ntohl(*bp++);
+			if (tmp>0 && tmp!=count)
+				break;
+			if (tmp==0)
+				bp = NULL;
+
+			pcb = cb = rxrpc_call_alloc_scratch_s(call,afs_callback_t);
+
+			for (loop=count-1; loop>=0; loop--) {
+				pcb->fid.vid	= ntohl(*fp++);
+				pcb->fid.vnode	= ntohl(*fp++);
+				pcb->fid.unique	= ntohl(*fp++);
+				if (bp) {
+					pcb->version	= ntohl(*bp++);
+					pcb->expiry	= ntohl(*bp++);
+					pcb->type	= ntohl(*bp++);
+				}
+				else {
+					pcb->version	= 0;
+					pcb->expiry	= 0;
+					pcb->type	= AFSCM_CB_UNTYPED;
+				}
+				pcb++;
+			}
+
+			/* invoke the actual service routine */
+			ret = SRXAFSCM_CallBack(server,count,cb);
+			if (ret<0)
+				break;
+		}
+
+		/* send the reply */
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_CallBack() */
+
+/*****************************************************************************/
+/*
+ * handle the fileserver asking us to initialise our callback state
+ */
+static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet - drain all the data from the call */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		/* shouldn't be any args */
+		ret = -EBADMSG;
+		break;
+
+		/* send the reply when asked for it */
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+		/* invoke the actual service routine */
+		ret = SRXAFSCM_InitCallBackState(server);
+		if (ret<0)
+			break;
+
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_InitCallBackState() */
+
+/*****************************************************************************/
+/*
+ * handle a probe from a fileserver
+ */
+static void _SRXAFSCM_Probe(struct rxrpc_call *call)
+{
+	afs_server_t *server;
+	size_t count;
+	int ret = 0, removed;
+
+	_enter("%p{acs=%s}",call,rxrpc_call_states[call->app_call_state]);
+
+	server = afs_server_get_from_peer(call->conn->peer);
+
+	switch (call->app_call_state) {
+		/* we've received the last packet - drain all the data from the call */
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+		/* shouldn't be any args */
+		ret = -EBADMSG;
+		break;
+
+		/* send the reply when asked for it */
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+		/* invoke the actual service routine */
+		ret = SRXAFSCM_Probe(server);
+		if (ret<0)
+			break;
+
+		ret = rxrpc_call_write_data(call,0,NULL,RXRPC_LAST_PACKET,GFP_KERNEL,0,&count);
+		if (ret<0)
+			break;
+		break;
+
+		/* operation complete */
+	case RXRPC_CSTATE_COMPLETE:
+		call->app_user = NULL;
+		removed = 0;
+		spin_lock(&afscm_calls_lock);
+		if (!list_empty(&call->app_link)) {
+			list_del_init(&call->app_link);
+			removed = 1;
+		}
+		spin_unlock(&afscm_calls_lock);
+
+		if (removed)
+			rxrpc_put_call(call);
+		break;
+
+		/* operation terminated on error */
+	case RXRPC_CSTATE_ERROR:
+		call->app_user = NULL;
+		break;
+
+	default:
+		break;
+	}
+
+	if (ret<0)
+		rxrpc_call_abort(call,ret);
+
+	if (server) afs_put_server(server);
+
+	_leave(" = %d",ret);
+
+} /* end _SRXAFSCM_Probe() */
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
new file mode 100644
index 000000000000..89fb14e7615b
--- /dev/null
+++ b/fs/afs/cmservice.h
@@ -0,0 +1,27 @@
+/* cmservice.h: AFS Cache Manager Service declarations
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_CMSERVICE_H
+#define _LINUX_AFS_CMSERVICE_H
+
+#include <rxrpc/transport.h>
+#include "types.h"
+
+/* cache manager start/stop */
+extern int afscm_start(void);
+extern void afscm_stop(void);
+
+/* cache manager server functions */
+extern int SRXAFSCM_InitCallBackState(afs_server_t *server);
+extern int SRXAFSCM_CallBack(afs_server_t *server, size_t count, afs_callback_t callbacks[]);
+extern int SRXAFSCM_Probe(afs_server_t *server);
+
+#endif /* _LINUX_AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
new file mode 100644
index 000000000000..d39345f4e277
--- /dev/null
+++ b/fs/afs/dir.c
@@ -0,0 +1,642 @@
+/* dir.c: AFS filesystem directory handling
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include "vnode.h"
+#include "volume.h"
+#include <rxrpc/call.h>
+#include "super.h"
+#include "internal.h"
+
+static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry);
+static int afs_dir_open(struct inode *inode, struct file *file);
+static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir);
+static int afs_d_revalidate(struct dentry *dentry, int flags);
+static int afs_d_delete(struct dentry *dentry);
+static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos,
+				     ino_t ino, unsigned dtype);
+
+struct file_operations afs_dir_file_operations = {
+	.open		= afs_dir_open,
+	.readdir	= afs_dir_readdir,
+};
+
+struct inode_operations afs_dir_inode_operations = {
+	.lookup		= afs_dir_lookup,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+//	.create		= afs_dir_create,
+//	.link		= afs_dir_link,
+//	.unlink		= afs_dir_unlink,
+//	.symlink	= afs_dir_symlink,
+//	.mkdir		= afs_dir_mkdir,
+//	.rmdir		= afs_dir_rmdir,
+//	.mknod		= afs_dir_mknod,
+//	.rename		= afs_dir_rename,
+};
+
+static struct dentry_operations afs_fs_dentry_operations = {
+	.d_revalidate	= afs_d_revalidate,
+	.d_delete	= afs_d_delete,
+};
+
+#define AFS_DIR_HASHTBL_SIZE	128
+#define AFS_DIR_DIRENT_SIZE	32
+#define AFS_DIRENT_PER_BLOCK	64
+
+typedef union afs_dirent {
+	struct {
+		u8	valid;
+		u8	unused[1];
+		u16	hash_next;
+		u32	vnode;
+		u32	unique;
+		u8	name[16];
+		u8	overflow[4];	/* if any char of the name (inc NUL) reaches here, consume
+					 * the next dirent too */
+	};
+	u8	extended_name[32];
+} afs_dirent_t;
+
+/* AFS directory page header (one at the beginning of every 2048-byte chunk) */
+typedef struct afs_dir_pagehdr {
+	u16	npages;
+	u16	magic;
+#define AFS_DIR_MAGIC htons(1234)
+	u8	nentries;
+	u8	bitmap[8];
+	u8	pad[19];
+} afs_dir_pagehdr_t;
+
+/* directory block layout */
+typedef union afs_dir_block {
+
+	afs_dir_pagehdr_t pagehdr;
+
+	struct {
+		afs_dir_pagehdr_t pagehdr;
+		u8		alloc_ctrs[128];
+		u16		hashtable[AFS_DIR_HASHTBL_SIZE]; /* dir hash table */
+	} hdr;
+
+	afs_dirent_t dirents[AFS_DIRENT_PER_BLOCK];
+} afs_dir_block_t;
+
+/* layout on a linux VM page */
+typedef struct afs_dir_page {
+	afs_dir_block_t	blocks[PAGE_SIZE/sizeof(afs_dir_block_t)];
+} afs_dir_page_t;
+
+struct afs_dir_lookup_cookie {
+	afs_fid_t	fid;
+	const char	*name;
+	size_t		nlen;
+	int		found;
+};
+
+/*****************************************************************************/
+/*
+ * check that a directory page is valid
+ */
+static inline void afs_dir_check_page(struct inode *dir, struct page *page)
+{
+	afs_dir_page_t *dbuf;
+	loff_t latter;
+	int tmp, qty;
+
+#if 0
+	/* check the page count */
+	qty = desc.size/sizeof(dbuf->blocks[0]);
+	if (qty==0)
+		goto error;
+
+	if (page->index==0 && qty!=ntohs(dbuf->blocks[0].pagehdr.npages)) {
+		printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
+		       __FUNCTION__,dir->i_ino,qty,ntohs(dbuf->blocks[0].pagehdr.npages));
+		goto error;
+	}
+#endif
+
+	/* determine how many magic numbers there should be in this page */
+	latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT);
+	if (latter >= PAGE_SIZE)
+		qty = PAGE_SIZE;
+	else
+		qty = latter;
+	qty /= sizeof(afs_dir_block_t);
+
+	/* check them */
+	dbuf = page_address(page);
+	for (tmp=0; tmp<qty; tmp++) {
+		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
+			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
+			       __FUNCTION__,dir->i_ino,tmp,
+			       qty,ntohs(dbuf->blocks[tmp].pagehdr.magic));
+			goto error;
+		}
+	}
+
+	SetPageChecked(page);
+	return;
+
+ error:
+	SetPageChecked(page);
+	SetPageError(page);
+
+} /* end afs_dir_check_page() */
+
+/*****************************************************************************/
+/*
+ * discard a page cached in the pagecache
+ */
+static inline void afs_dir_put_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+
+} /* end afs_dir_put_page() */
+
+/*****************************************************************************/
+/*
+ * get a page into the pagecache
+ */
+static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
+{
+	struct page *page;
+
+	_enter("{%lu},%lu",dir->i_ino,index);
+
+	page = read_cache_page(dir->i_mapping,index,
+			       (filler_t*)dir->i_mapping->a_ops->readpage,NULL);
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		kmap(page);
+		if (!PageUptodate(page))
+			goto fail;
+		if (!PageChecked(page))
+			afs_dir_check_page(dir,page);
+		if (PageError(page))
+			goto fail;
+	}
+	return page;
+
+ fail:
+	afs_dir_put_page(page);
+	return ERR_PTR(-EIO);
+} /* end afs_dir_get_page() */
+
+/*****************************************************************************/
+/*
+ * open an AFS directory file
+ */
+static int afs_dir_open(struct inode *inode, struct file *file)
+{
+	_enter("{%lu}",inode->i_ino);
+
+	if (sizeof(afs_dir_block_t) != 2048) BUG();
+	if (sizeof(afs_dirent_t) != 32) BUG();
+
+	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED)
+		return -ENOENT;
+
+	_leave(" = 0");
+	return 0;
+
+} /* end afs_dir_open() */
+
+/*****************************************************************************/
+/*
+ * deal with one block in an AFS directory
+ */
+static int afs_dir_iterate_block(unsigned *fpos,
+				    afs_dir_block_t *block,
+				    unsigned blkoff,
+				    void *cookie,
+				    filldir_t filldir)
+{
+	afs_dirent_t *dire;
+	unsigned offset, next, curr;
+	size_t nlen;
+	int tmp, ret;
+
+	_enter("%u,%x,%p,,",*fpos,blkoff,block);
+
+	curr = (*fpos - blkoff) / sizeof(afs_dirent_t);
+
+	/* walk through the block, an entry at a time */
+	for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries;
+	     offset < AFS_DIRENT_PER_BLOCK;
+	     offset = next
+	     ) {
+		next = offset + 1;
+
+		/* skip entries marked unused in the bitmap */
+		if (!(block->pagehdr.bitmap[offset/8] & (1 << (offset % 8)))) {
+			_debug("ENT[%u.%u]: unused\n",blkoff/sizeof(afs_dir_block_t),offset);
+			if (offset>=curr)
+				*fpos = blkoff + next * sizeof(afs_dirent_t);
+			continue;
+		}
+
+		/* got a valid entry */
+		dire = &block->dirents[offset];
+		nlen = strnlen(dire->name,sizeof(*block) - offset*sizeof(afs_dirent_t));
+
+		_debug("ENT[%u.%u]: %s %u \"%.*s\"\n",
+		       blkoff/sizeof(afs_dir_block_t),offset,
+		       offset<curr ? "skip" : "fill",
+		       nlen,nlen,dire->name);
+
+		/* work out where the next possible entry is */
+		for (tmp=nlen; tmp>15; tmp-=sizeof(afs_dirent_t)) {
+			if (next>=AFS_DIRENT_PER_BLOCK) {
+				_debug("ENT[%u.%u]:"
+				       " %u travelled beyond end dir block (len %u/%u)\n",
+				       blkoff/sizeof(afs_dir_block_t),offset,next,tmp,nlen);
+				return -EIO;
+			}
+			if (!(block->pagehdr.bitmap[next/8] & (1 << (next % 8)))) {
+				_debug("ENT[%u.%u]: %u unmarked extension (len %u/%u)\n",
+				       blkoff/sizeof(afs_dir_block_t),offset,next,tmp,nlen);
+				return -EIO;
+			}
+
+			_debug("ENT[%u.%u]: ext %u/%u\n",
+			       blkoff/sizeof(afs_dir_block_t),next,tmp,nlen);
+			next++;
+		}
+
+		/* skip if starts before the current position */
+		if (offset<curr)
+			continue;
+
+		/* found the next entry */
+		ret = filldir(cookie,
+			      dire->name,
+			      nlen,
+			      blkoff + offset * sizeof(afs_dirent_t),
+			      ntohl(dire->vnode),
+			      filldir==afs_dir_lookup_filldir ? dire->unique : DT_UNKNOWN);
+		if (ret<0) {
+			_leave(" = 0 [full]");
+			return 0;
+		}
+
+		*fpos = blkoff + next * sizeof(afs_dirent_t);
+	}
+
+	_leave(" = 1 [more]");
+	return 1;
+} /* end afs_dir_iterate_block() */
+
+/*****************************************************************************/
+/*
+ * read an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, filldir_t filldir)
+{
+	afs_dir_block_t	*dblock;
+	afs_dir_page_t *dbuf;
+	struct page *page;
+	unsigned blkoff, limit;
+	int ret;
+
+	_enter("{%lu},%u,,",dir->i_ino,*fpos);
+
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+
+	/* round the file position up to the next entry boundary */
+	*fpos += sizeof(afs_dirent_t) - 1;
+	*fpos &= ~(sizeof(afs_dirent_t) - 1);
+
+	/* walk through the blocks in sequence */
+	ret = 0;
+	while (*fpos < dir->i_size) {
+		blkoff = *fpos & ~(sizeof(afs_dir_block_t) - 1);
+
+		/* fetch the appropriate page from the directory */
+		page = afs_dir_get_page(dir,blkoff/PAGE_SIZE);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			break;
+		}
+
+		limit = blkoff & ~(PAGE_SIZE-1);
+
+		dbuf = page_address(page);
+
+		/* deal with the individual blocks stashed on this page */
+		do {
+			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / sizeof(afs_dir_block_t)];
+			ret = afs_dir_iterate_block(fpos,dblock,blkoff,cookie,filldir);
+			if (ret!=1) {
+				afs_dir_put_page(page);
+				goto out;
+			}
+
+			blkoff += sizeof(afs_dir_block_t);
+
+		} while (*fpos < dir->i_size && blkoff < limit);
+
+		afs_dir_put_page(page);
+		ret = 0;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_dir_iterate() */
+
+/*****************************************************************************/
+/*
+ * read an AFS directory
+ */
+static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
+{
+	unsigned fpos;
+	int ret;
+
+	_enter("{%Ld,{%lu}}",file->f_pos,file->f_dentry->d_inode->i_ino);
+
+	fpos = file->f_pos;
+	ret = afs_dir_iterate(file->f_dentry->d_inode,&fpos,cookie,filldir);
+	file->f_pos = fpos;
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_dir_readdir() */
+
+/*****************************************************************************/
+/*
+ * search the directory for a name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID uniquifier through dtype
+ */
+static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos,
+				  ino_t ino, unsigned dtype)
+{
+	struct afs_dir_lookup_cookie *cookie = _cookie;
+
+	_enter("{%s,%u},%s,%u,,%lu,%u",cookie->name,cookie->nlen,name,nlen,ino,ntohl(dtype));
+
+	if (cookie->nlen != nlen || memcmp(cookie->name,name,nlen)!=0) {
+		_leave(" = 0 [no]");
+		return 0;
+	}
+
+	cookie->fid.vnode = ino;
+	cookie->fid.unique = ntohl(dtype);
+	cookie->found = 1;
+
+	_leave(" = -1 [found]");
+	return -1;
+} /* end afs_dir_lookup_filldir() */
+
+/*****************************************************************************/
+/*
+ * look up an entry in a directory
+ */
+static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_dir_lookup_cookie cookie;
+	struct afs_super_info *as;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	unsigned fpos;
+	int ret;
+
+	_enter("{%lu},{%s}",dir->i_ino,dentry->d_name.name);
+
+	/* insanity checks first */
+	if (sizeof(afs_dir_block_t) != 2048) BUG();
+	if (sizeof(afs_dirent_t) != 32) BUG();
+
+	if (dentry->d_name.len > 255) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	vnode = AFS_FS_I(dir);
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return ERR_PTR(-ESTALE);
+	}
+
+	as = dir->i_sb->s_fs_info;
+
+	/* search the directory */
+	cookie.name	= dentry->d_name.name;
+	cookie.nlen	= dentry->d_name.len;
+	cookie.fid.vid	= as->volume->vid;
+	cookie.found	= 0;
+
+	fpos = 0;
+	ret = afs_dir_iterate(dir,&fpos,&cookie,afs_dir_lookup_filldir);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = -ENOENT;
+	if (!cookie.found) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	/* instantiate the dentry */
+	ret = afs_iget(dir->i_sb,&cookie.fid,&inode);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	dentry->d_op = &afs_fs_dentry_operations;
+	dentry->d_fsdata = (void*) (unsigned) vnode->status.version;
+
+	d_add(dentry,inode);
+	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
+	       cookie.fid.vnode,
+	       cookie.fid.unique,
+	       dentry->d_inode->i_ino,
+	       dentry->d_inode->i_version);
+
+	return NULL;
+} /* end afs_dir_lookup() */
+
+/*****************************************************************************/
+/*
+ * check that a dentry lookup hit has found a valid entry
+ * - NOTE! the hit can be a negative hit too, so we can't assume we have an inode
+ * (derived from nfs_lookup_revalidate)
+ */
+static int afs_d_revalidate(struct dentry *dentry, int flags)
+{
+	struct afs_dir_lookup_cookie cookie;
+	struct dentry *parent;
+	struct inode *inode, *dir;
+	unsigned fpos;
+	int ret;
+
+	_enter("%s,%x",dentry->d_name.name,flags);
+
+	/* lock down the parent dentry so we can peer at it */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	read_lock(&dparent_lock);
+	parent = dget(dentry->d_parent);
+	read_unlock(&dparent_lock);
+#else
+	lock_kernel();
+	parent = dget(dentry->d_parent);
+	unlock_kernel();
+#endif
+
+	dir = parent->d_inode;
+	inode = dentry->d_inode;
+
+	/* handle a negative inode */
+	if (!inode)
+		goto out_bad;
+
+	/* handle a bad inode */
+	if (is_bad_inode(inode)) {
+		printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
+		       dentry->d_parent->d_name.name,dentry->d_name.name);
+		goto out_bad;
+	}
+
+	/* force a full look up if the parent directory changed since last the server was consulted
+	 * - otherwise this inode must still exist, even if the inode details themselves have
+	 *   changed
+	 */
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_CHANGED)
+		afs_vnode_fetch_status(AFS_FS_I(dir));
+
+	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+		_debug("%s: parent dir deleted",dentry->d_name.name);
+		goto out_bad;
+	}
+
+	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) {
+		_debug("%s: file already deleted",dentry->d_name.name);
+		goto out_bad;
+	}
+
+	if ((unsigned)dentry->d_fsdata != (unsigned)AFS_FS_I(dir)->status.version) {
+		_debug("%s: parent changed %u -> %u",
+		       dentry->d_name.name,
+		       (unsigned)dentry->d_fsdata,
+		       (unsigned)AFS_FS_I(dir)->status.version);
+
+		/* search the directory for this vnode */
+		cookie.name	= dentry->d_name.name;
+		cookie.nlen	= dentry->d_name.len;
+		cookie.fid.vid	= AFS_FS_I(inode)->volume->vid;
+		cookie.found	= 0;
+
+		fpos = 0;
+		ret = afs_dir_iterate(dir,&fpos,&cookie,afs_dir_lookup_filldir);
+		if (ret<0) {
+			_debug("failed to iterate dir %s: %d",parent->d_name.name,ret);
+			goto out_bad;
+		}
+
+		if (!cookie.found) {
+			_debug("%s: dirent not found",dentry->d_name.name);
+			goto not_found;
+		}
+
+		/* if the vnode ID has changed, then the dirent points to a different file */
+		if (cookie.fid.vnode!=AFS_FS_I(inode)->fid.vnode) {
+			_debug("%s: dirent changed",dentry->d_name.name);
+			goto not_found;
+		}
+
+		/* if the vnode ID uniqifier has changed, then the file has been deleted */
+		if (cookie.fid.unique!=AFS_FS_I(inode)->fid.unique) {
+			_debug("%s: file deleted (uq %u -> %u I:%lu)",
+			       dentry->d_name.name,
+			       cookie.fid.unique,
+			       AFS_FS_I(inode)->fid.unique,
+			       inode->i_version);
+			spin_lock(&AFS_FS_I(inode)->lock);
+			AFS_FS_I(inode)->flags |= AFS_VNODE_DELETED;
+			spin_unlock(&AFS_FS_I(inode)->lock);
+			invalidate_inode_pages(inode->i_mapping);
+			goto out_bad;
+		}
+
+		dentry->d_fsdata = (void*) (unsigned) AFS_FS_I(dir)->status.version;
+	}
+
+ out_valid:
+	dput(parent);
+	_leave(" = 1 [valid]");
+	return 1;
+
+	/* the dirent, if it exists, now points to a different vnode */
+ not_found:
+	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+
+ out_bad:
+	if (inode) {
+		/* don't unhash if we have submounts */
+		if (have_submounts(dentry))
+			goto out_valid;
+	}
+
+	shrink_dcache_parent(dentry);
+
+	_debug("dropping dentry %s/%s",dentry->d_parent->d_name.name,dentry->d_name.name);
+	d_drop(dentry);
+
+	dput(parent);
+
+	_leave(" = 0 [bad]");
+	return 0;
+} /* end afs_d_revalidate() */
+
+/*****************************************************************************/
+/*
+ * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't sleep)
+ * - called from dput() when d_count is going to 0.
+ * - return 1 to request dentry be unhashed, 0 otherwise
+ */
+static int afs_d_delete(struct dentry *dentry)
+{
+	_enter("%s",dentry->d_name.name);
+
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		goto zap;
+
+	if (dentry->d_inode) {
+		if (AFS_FS_I(dentry->d_inode)->flags & AFS_VNODE_DELETED)
+			goto zap;
+	}
+
+	_leave(" = 0 [keep]");
+	return 0;
+
+ zap:
+	_leave(" = 1 [zap]");
+	return 1;
+} /* end afs_d_delete() */
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
new file mode 100644
index 000000000000..115befe16450
--- /dev/null
+++ b/fs/afs/errors.h
@@ -0,0 +1,34 @@
+/* errors.h: AFS abort/error codes
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _H_DB712916_5113_11D6_9A6D_0002B3163499
+#define _H_DB712916_5113_11D6_9A6D_0002B3163499
+
+#include "types.h"
+
+/* file server abort codes */
+typedef enum {
+	VSALVAGE	= 101,	/* volume needs salvaging */
+	VNOVNODE	= 102,	/* no such file/dir (vnode) */
+	VNOVOL		= 103,	/* no such volume or volume unavailable */
+	VVOLEXISTS	= 104,	/* volume name already exists */
+	VNOSERVICE	= 105,	/* volume not currently in service */
+	VOFFLINE	= 106,	/* volume is currently offline (more info available [VVL-spec]) */
+	VONLINE		= 107,	/* volume is already online */
+	VDISKFULL	= 108,	/* disk partition is full */
+	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
+	VBUSY		= 110,	/* volume is temporarily unavailable */
+	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
+} afs_rxfs_abort_t;
+
+extern int afs_abort_to_error(int abortcode);
+
+#endif /* _H_DB712916_5113_11D6_9A6D_0002B3163499 */
diff --git a/fs/afs/file.c b/fs/afs/file.c
new file mode 100644
index 000000000000..d14e427b5784
--- /dev/null
+++ b/fs/afs/file.c
@@ -0,0 +1,143 @@
+/* file.c: AFS filesystem file handling
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include <rxrpc/call.h>
+#include "internal.h"
+
+//static int afs_file_open(struct inode *inode, struct file *file);
+//static int afs_file_release(struct inode *inode, struct file *file);
+
+static int afs_file_readpage(struct file *file, struct page *page);
+
+//static ssize_t afs_file_read(struct file *file, char *buf, size_t size, loff_t *off);
+
+static ssize_t afs_file_write(struct file *file, const char *buf, size_t size, loff_t *off);
+
+struct inode_operations afs_file_inode_operations = {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+};
+
+struct file_operations afs_file_file_operations = {
+//	.open		= afs_file_open,
+//	.release	= afs_file_release,
+	.read		= generic_file_read, //afs_file_read,
+	.write		= afs_file_write,
+	.mmap		= generic_file_mmap,
+//	.fsync		= afs_file_fsync,
+};
+
+struct address_space_operations afs_fs_aops = {
+	.readpage	= afs_file_readpage,
+};
+
+/*****************************************************************************/
+/*
+ * AFS file read
+ */
+#if 0
+static ssize_t afs_file_read(struct file *file, char *buf, size_t size, loff_t *off)
+{
+	struct afs_inode_info *ai;
+
+	ai = AFS_FS_I(file->f_dentry->d_inode);
+	if (ai->flags & AFS_INODE_DELETED)
+		return -ESTALE;
+
+	return -EIO;
+} /* end afs_file_read() */
+#endif
+
+/*****************************************************************************/
+/*
+ * AFS file write
+ */
+static ssize_t afs_file_write(struct file *file, const char *buf, size_t size, loff_t *off)
+{
+	afs_vnode_t *vnode;
+
+	vnode = AFS_FS_I(file->f_dentry->d_inode);
+	if (vnode->flags & AFS_VNODE_DELETED)
+		return -ESTALE;
+
+	return -EIO;
+} /* end afs_file_write() */
+
+/*****************************************************************************/
+/*
+ * AFS read page from file (or symlink)
+ */
+static int afs_file_readpage(struct file *file, struct page *page)
+{
+	struct afs_rxfs_fetch_descriptor desc;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = page->mapping->host;
+
+	_enter("{%lu},{%lu}",inode->i_ino,page->index);
+
+	vnode = AFS_FS_I(inode);
+
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+
+	ret = -ESTALE;
+	if (vnode->flags & AFS_VNODE_DELETED)
+		goto error;
+
+	/* work out how much to get and from where */
+	desc.fid	= vnode->fid;
+	desc.offset	= page->index << PAGE_CACHE_SHIFT;
+	desc.size	= min((size_t)(inode->i_size - desc.offset),(size_t)PAGE_SIZE);
+	desc.buffer	= kmap(page);
+
+	clear_page(desc.buffer);
+
+	/* read the contents of the file from the server into the page */
+	ret = afs_vnode_fetch_data(vnode,&desc);
+	kunmap(page);
+	if (ret<0) {
+		if (ret==-ENOENT) {
+			_debug("got NOENT from server - marking file deleted and stale");
+			vnode->flags |= AFS_VNODE_DELETED;
+			ret = -ESTALE;
+		}
+		goto error;
+	}
+
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	_leave(" = 0");
+	return 0;
+
+ error:
+	SetPageError(page);
+	unlock_page(page);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_file_readpage() */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
new file mode 100644
index 000000000000..e4aabcb85d7d
--- /dev/null
+++ b/fs/afs/fsclient.c
@@ -0,0 +1,816 @@
+/* fsclient.c: AFS File Server client stubs
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "fsclient.h"
+#include "cmservice.h"
+#include "vnode.h"
+#include "server.h"
+#include "errors.h"
+#include "internal.h"
+
+#define FSFETCHSTATUS		132	/* AFS Fetch file status */
+#define FSFETCHDATA		130	/* AFS Fetch file data */
+#define FSGIVEUPCALLBACKS	147	/* AFS Discard server callback promises */
+#define FSGETVOLUMEINFO		148	/* AFS Get root volume information */
+#define FSGETROOTVOLUME		151	/* AFS Get root volume name */
+#define FSLOOKUP		161	/* AFS lookup file in directory */
+
+/*****************************************************************************/
+/*
+ * map afs abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afs_rxfs_aemap(struct rxrpc_call *call)
+{
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		break;
+	case RXRPC_ESTATE_PEER_ABORT:
+		call->app_errno = afs_abort_to_error(call->app_abort_code);
+		break;
+	default:
+		break;
+	}
+} /* end afs_rxfs_aemap() */
+
+/*****************************************************************************/
+/*
+ * get the root volume name from a fileserver
+ * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ */
+#if 0
+int afs_rxfs_get_root_volume(afs_server_t *server, char *buf, size_t *buflen)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[2];
+	size_t sent;
+	int ret;
+	u32 param[1];
+
+	kenter("%p,%p,%u",server,buf,*buflen);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGETROOTVOLUME;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(FSGETROOTVOLUME);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		kdebug("Got Error: %d",ret);
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		/* read the reply */
+		kdebug("Got Reply: qty=%d",call->app_ready_qty);
+
+		ret = -EBADMSG;
+		if (call->app_ready_qty <= 4)
+			goto abort;
+
+		ret = rxrpc_call_read_data(call,NULL,call->app_ready_qty,0);
+		if (ret<0)
+			goto abort;
+
+#if 0
+		/* unmarshall the reply */
+		bp = buffer;
+		for (loop=0; loop<65; loop++)
+			entry->name[loop] = ntohl(*bp++);
+		entry->name[64] = 0;
+
+		entry->type = ntohl(*bp++);
+		entry->num_servers = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].addr.s_addr = *bp++;
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].partition = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].flags = ntohl(*bp++);
+
+		for (loop=0; loop<3; loop++)
+			entry->volume_ids[loop] = ntohl(*bp++);
+
+		entry->clone_id = ntohl(*bp++);
+		entry->flags = ntohl(*bp);
+#endif
+
+		/* success */
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	kleave("");
+	return ret;
+} /* end afs_rxfs_get_root_volume() */
+#endif
+
+/*****************************************************************************/
+/*
+ * get information about a volume
+ */
+#if 0
+int afs_rxfs_get_volume_info(afs_server_t *server,
+			     const char *name,
+			     afs_volume_info_t *vinfo)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	size_t sent;
+	int ret;
+	u32 param[2], *bp, zero;
+
+	_enter("%p,%s,%p",server,name,vinfo);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGETVOLUMEINFO;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	piov[1].iov_len = strlen(name);
+	piov[1].iov_base = (char*)name;
+
+	zero = 0;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	param[0] = htonl(FSGETVOLUMEINFO);
+	param[1] = htonl(piov[1].iov_len);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,64);
+
+	ret = rxrpc_call_read_data(call,bp,64,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	vinfo->vid = ntohl(*bp++);
+	vinfo->type = ntohl(*bp++);
+
+	vinfo->type_vids[0] = ntohl(*bp++);
+	vinfo->type_vids[1] = ntohl(*bp++);
+	vinfo->type_vids[2] = ntohl(*bp++);
+	vinfo->type_vids[3] = ntohl(*bp++);
+	vinfo->type_vids[4] = ntohl(*bp++);
+
+	vinfo->nservers = ntohl(*bp++);
+	vinfo->servers[0].addr.s_addr = *bp++;
+	vinfo->servers[1].addr.s_addr = *bp++;
+	vinfo->servers[2].addr.s_addr = *bp++;
+	vinfo->servers[3].addr.s_addr = *bp++;
+	vinfo->servers[4].addr.s_addr = *bp++;
+	vinfo->servers[5].addr.s_addr = *bp++;
+	vinfo->servers[6].addr.s_addr = *bp++;
+	vinfo->servers[7].addr.s_addr = *bp++;
+
+	ret = -EBADMSG;
+	if (vinfo->nservers>8)
+		goto abort;
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+
+} /* end afs_rxfs_get_volume_info() */
+#endif
+
+/*****************************************************************************/
+/*
+ * fetch the status information for a file
+ */
+int afs_rxfs_fetch_file_status(afs_server_t *server,
+			       afs_vnode_t *vnode,
+			       afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{%u,%u,%u}",server,vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSFETCHSTATUS;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,16);
+	bp[0] = htonl(FSFETCHSTATUS);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+
+	piov[0].iov_len = 16;
+	piov[0].iov_base = bp;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,120);
+
+	ret = rxrpc_call_read_data(call,bp,120,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= vnode->fid.vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	vnode->cb_version		= ntohl(*bp++);
+	vnode->cb_expiry		= ntohl(*bp++);
+	vnode->cb_type			= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_fetch_file_status() */
+
+/*****************************************************************************/
+/*
+ * fetch the contents of a file or directory
+ */
+int afs_rxfs_fetch_file_data(afs_server_t *server,
+			     afs_vnode_t *vnode,
+			     struct afs_rxfs_fetch_descriptor *desc,
+			     afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{fid={%u,%u,%u},sz=%u,of=%lu}",
+	       server,
+	       desc->fid.vid,
+	       desc->fid.vnode,
+	       desc->fid.unique,
+	       desc->size,
+	       desc->offset);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSFETCHDATA;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,24);
+	bp[0] = htonl(FSFETCHDATA);
+	bp[1] = htonl(desc->fid.vid);
+	bp[2] = htonl(desc->fid.vnode);
+	bp[3] = htonl(desc->fid.unique);
+	bp[4] = htonl(desc->offset);
+	bp[5] = htonl(desc->size);
+
+	piov[0].iov_len = 24;
+	piov[0].iov_base = bp;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the data count to arrive */
+	ret = rxrpc_call_read_data(call,bp,4,RXRPC_CALL_READ_BLOCK);
+	if (ret<0)
+		goto read_failed;
+
+	desc->actual = ntohl(bp[0]);
+	if (desc->actual!=desc->size) {
+		ret = -EBADMSG;
+		goto abort;
+	}
+
+	/* call the app to read the actual data */
+	rxrpc_call_reset_scratch(call);
+
+	ret = rxrpc_call_read_data(call,desc->buffer,desc->actual,RXRPC_CALL_READ_BLOCK);
+	if (ret<0)
+		goto read_failed;
+
+	/* wait for the rest of the reply to completely arrive */
+	rxrpc_call_reset_scratch(call);
+	bp = rxrpc_call_alloc_scratch(call,120);
+
+	ret = rxrpc_call_read_data(call,bp,120,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0)
+		goto read_failed;
+
+	/* unmarshall the reply */
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= desc->fid.vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	vnode->cb_version		= ntohl(*bp++);
+	vnode->cb_expiry		= ntohl(*bp++);
+	vnode->cb_type			= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ read_failed:
+	if (ret==-ECONNABORTED) {
+		ret = call->app_errno;
+		goto out_unwait;
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+
+} /* end afs_rxfs_fetch_file_data() */
+
+/*****************************************************************************/
+/*
+ * ask the AFS fileserver to discard a callback request on a file
+ */
+int afs_rxfs_give_up_callback(afs_server_t *server, afs_vnode_t *vnode)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct afs_server_callslot callslot;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 *bp;
+
+	_enter("%p,{%u,%u,%u}",server,vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_request_callslot(server,&callslot);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(callslot.conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSGIVEUPCALLBACKS;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,(1+4+4)*4);
+
+	piov[0].iov_len = (1+4+4)*4;
+	piov[0].iov_base = bp;
+
+	*bp++ = htonl(FSGIVEUPCALLBACKS);
+	*bp++ = htonl(1);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(1);
+	*bp++ = htonl(vnode->cb_version);
+	*bp++ = htonl(vnode->cb_expiry);
+	*bp++ = htonl(vnode->cb_type);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_callslot(server,&callslot);
+ out:
+	_leave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_give_up_callback() */
+
+/*****************************************************************************/
+/*
+ * look a filename up in a directory
+ * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ */
+#if 0
+int afs_rxfs_lookup(afs_server_t *server,
+		    afs_vnode_t *dir,
+		    const char *filename,
+		    afs_vnode_t *vnode,
+		    afs_volsync_t *volsync)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	size_t sent;
+	int ret;
+	u32 *bp, zero;
+
+	kenter("%p,{%u,%u,%u},%s",server,fid->vid,fid->vnode,fid->unique,filename);
+
+	/* get hold of the fileserver connection */
+	ret = afs_server_get_fsconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxfs_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = FSLOOKUP;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	bp = rxrpc_call_alloc_scratch(call,20);
+
+	zero = 0;
+
+	piov[0].iov_len = 20;
+	piov[0].iov_base = bp;
+	piov[1].iov_len = strlen(filename);
+	piov[1].iov_base = (char*) filename;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	*bp++ = htonl(FSLOOKUP);
+	*bp++ = htonl(dirfid->vid);
+	*bp++ = htonl(dirfid->vnode);
+	*bp++ = htonl(dirfid->unique);
+	*bp++ = htonl(piov[1].iov_len);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,220);
+
+	ret = rxrpc_call_read_data(call,bp,220,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	fid->vid		= ntohl(*bp++);
+	fid->vnode		= ntohl(*bp++);
+	fid->unique		= ntohl(*bp++);
+
+	vnode->status.if_version	= ntohl(*bp++);
+	vnode->status.type		= ntohl(*bp++);
+	vnode->status.nlink		= ntohl(*bp++);
+	vnode->status.size		= ntohl(*bp++);
+	vnode->status.version		= ntohl(*bp++);
+	vnode->status.author		= ntohl(*bp++);
+	vnode->status.owner		= ntohl(*bp++);
+	vnode->status.caller_access	= ntohl(*bp++);
+	vnode->status.anon_access	= ntohl(*bp++);
+	vnode->status.mode		= ntohl(*bp++);
+	vnode->status.parent.vid	= dirfid->vid;
+	vnode->status.parent.vnode	= ntohl(*bp++);
+	vnode->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	vnode->status.mtime_client	= ntohl(*bp++);
+	vnode->status.mtime_server	= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	vnode->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	dir->status.if_version		= ntohl(*bp++);
+	dir->status.type			= ntohl(*bp++);
+	dir->status.nlink		= ntohl(*bp++);
+	dir->status.size			= ntohl(*bp++);
+	dir->status.version		= ntohl(*bp++);
+	dir->status.author		= ntohl(*bp++);
+	dir->status.owner		= ntohl(*bp++);
+	dir->status.caller_access	= ntohl(*bp++);
+	dir->status.anon_access		= ntohl(*bp++);
+	dir->status.mode			= ntohl(*bp++);
+	dir->status.parent.vid		= dirfid->vid;
+	dir->status.parent.vnode		= ntohl(*bp++);
+	dir->status.parent.unique	= ntohl(*bp++);
+	bp++; /* seg size */
+	dir->status.mtime_client		= ntohl(*bp++);
+	dir->status.mtime_server		= ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	dir->status.version		|= ((unsigned long long) ntohl(*bp++)) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+
+	callback->fid		= *fid;
+	callback->version	= ntohl(*bp++);
+	callback->expiry	= ntohl(*bp++);
+	callback->type		= ntohl(*bp++);
+
+	if (volsync) {
+		volsync->creation	= ntohl(*bp++);
+		bp++; /* spare2 */
+		bp++; /* spare3 */
+		bp++; /* spare4 */
+		bp++; /* spare5 */
+		bp++; /* spare6 */
+	}
+
+	/* success */
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	afs_server_release_fsconn(server,conn);
+ out:
+	kleave("");
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxfs_lookup() */
+#endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
new file mode 100644
index 000000000000..0931a5b1be8f
--- /dev/null
+++ b/fs/afs/fsclient.h
@@ -0,0 +1,53 @@
+/* fsclient.h: AFS File Server client stub declarations
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_FSCLIENT_H
+#define _LINUX_AFS_FSCLIENT_H
+
+#include "server.h"
+
+extern int afs_rxfs_get_volume_info(afs_server_t *server,
+				    const char *name,
+				    afs_volume_info_t *vinfo);
+
+extern int afs_rxfs_fetch_file_status(afs_server_t *server,
+				      afs_vnode_t *vnode,
+				      afs_volsync_t *volsync);
+
+struct afs_rxfs_fetch_descriptor {
+	afs_fid_t	fid;		/* file ID to fetch */
+	size_t		size;		/* total number of bytes to fetch */
+	off_t		offset;		/* offset in file to start from */
+	void		*buffer;	/* read buffer */
+	size_t		actual;		/* actual size sent back by server */
+};
+
+extern int afs_rxfs_fetch_file_data(afs_server_t *server,
+				    afs_vnode_t *vnode,
+				    struct afs_rxfs_fetch_descriptor *desc,
+				    afs_volsync_t *volsync);
+
+extern int afs_rxfs_give_up_callback(afs_server_t *server, afs_vnode_t *vnode);
+
+/* this doesn't appear to work in OpenAFS server */
+extern int afs_rxfs_lookup(afs_server_t *server,
+			   afs_vnode_t *dir,
+			   const char *filename,
+			   afs_vnode_t *vnode,
+			   afs_volsync_t *volsync);
+
+/* this is apparently mis-implemented in OpenAFS server */
+extern int afs_rxfs_get_root_volume(afs_server_t *server,
+				    char *buf,
+				    size_t *buflen);
+
+
+#endif /* _LINUX_AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
new file mode 100644
index 000000000000..235b7b0bcf5e
--- /dev/null
+++ b/fs/afs/inode.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include "super.h"
+#include "internal.h"
+
+struct afs_iget_data {
+	afs_fid_t		fid;
+	afs_volume_t		*volume;	/* volume on which resides */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+	afs_vnode_t		*new_vnode;	/* new vnode record */
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * map the AFS file status to the inode member variables
+ */
+static int afs_inode_map_status(afs_vnode_t *vnode)
+{
+	struct inode *inode = AFS_VNODE_TO_I(vnode);
+
+	_debug("FS: ft=%d lk=%d sz=%u ver=%Lu mod=%hu",
+	       vnode->status.type,
+	       vnode->status.nlink,
+	       vnode->status.size,
+	       vnode->status.version,
+	       vnode->status.mode);
+
+	switch (vnode->status.type) {
+	case AFS_FTYPE_FILE:
+		inode->i_mode	= S_IFREG | vnode->status.mode;
+		inode->i_op	= &afs_file_inode_operations;
+		inode->i_fop	= &afs_file_file_operations;
+		break;
+	case AFS_FTYPE_DIR:
+		inode->i_mode	= S_IFDIR | vnode->status.mode;
+		inode->i_op	= &afs_dir_inode_operations;
+		inode->i_fop	= &afs_dir_file_operations;
+		break;
+	case AFS_FTYPE_SYMLINK:
+		inode->i_mode	= S_IFLNK | vnode->status.mode;
+		inode->i_op	= &page_symlink_inode_operations;
+		break;
+	default:
+		printk("kAFS: AFS vnode with undefined type\n");
+		return -EBADMSG;
+	}
+
+	inode->i_nlink		= vnode->status.nlink;
+	inode->i_uid		= vnode->status.owner;
+	inode->i_gid		= 0;
+	inode->i_rdev		= NODEV;
+	inode->i_size		= vnode->status.size;
+	inode->i_atime		= inode->i_mtime = inode->i_ctime = vnode->status.mtime_server;
+	inode->i_blksize	= PAGE_CACHE_SIZE;
+	inode->i_blocks		= 0;
+	inode->i_version	= vnode->fid.unique;
+	inode->i_mapping->a_ops	= &afs_fs_aops;
+
+	/* check to see whether a symbolic link is really a mountpoint */
+	if (vnode->status.type==AFS_FTYPE_SYMLINK) {
+		afs_mntpt_check_symlink(vnode);
+
+		if (vnode->flags & AFS_VNODE_MOUNTPOINT) {
+			inode->i_mode	= S_IFDIR | vnode->status.mode;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		}
+	}
+
+	return 0;
+} /* end afs_inode_map_status() */
+
+/*****************************************************************************/
+/*
+ * attempt to fetch the status of an inode, coelescing multiple simultaneous fetches
+ */
+int afs_inode_fetch_status(struct inode *inode)
+{
+	afs_vnode_t *vnode;
+	int ret;
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_vnode_fetch_status(vnode);
+
+	if (ret==0)
+		ret = afs_inode_map_status(vnode);
+
+	return ret;
+
+} /* end afs_inode_fetch_status() */
+
+/*****************************************************************************/
+/*
+ * iget5() comparator
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static int afs_iget5_test(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+
+	/* only match inodes with the same version number */
+	return inode->i_ino==data->fid.vnode && inode->i_version==data->fid.unique;
+} /* end afs_iget5_test() */
+#endif
+
+/*****************************************************************************/
+/*
+ * iget5() inode initialiser
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static int afs_iget5_set(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+	afs_vnode_t *vnode = AFS_FS_I(inode);
+
+	inode->i_ino = data->fid.vnode;
+	inode->i_version = data->fid.unique;
+	vnode->fid = data->fid;
+	vnode->volume = data->volume;
+
+	return 0;
+} /* end afs_iget5_set() */
+#endif
+
+/*****************************************************************************/
+/*
+ * iget4() comparator
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+static int afs_iget4_test(struct inode *inode, ino_t ino, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+
+	/* only match inodes with the same version number */
+	return inode->i_ino==data->fid.vnode && inode->i_version==data->fid.unique;
+} /* end afs_iget4_test() */
+#endif
+
+/*****************************************************************************/
+/*
+ * read an inode (2.4 only)
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+void afs_read_inode2(struct inode *inode, void *opaque)
+{
+	struct afs_iget_data *data = opaque;
+	afs_vnode_t *vnode;
+	int ret;
+
+	_enter(",{{%u,%u,%u},%p}",data->fid.vid,data->fid.vnode,data->fid.unique,data->volume);
+
+	if (inode->u.generic_ip) BUG();
+
+	/* attach a pre-allocated vnode record */
+	inode->u.generic_ip = vnode = data->new_vnode;
+	data->new_vnode = NULL;
+
+	memset(vnode,0,sizeof(*vnode));
+	vnode->inode = inode;
+	init_waitqueue_head(&vnode->update_waitq);
+	spin_lock_init(&vnode->lock);
+	INIT_LIST_HEAD(&vnode->cb_link);
+	INIT_LIST_HEAD(&vnode->cb_hash_link);
+	afs_timer_init(&vnode->cb_timeout,&afs_vnode_cb_timed_out_ops);
+	vnode->flags |= AFS_VNODE_CHANGED;
+	vnode->volume = data->volume;
+	vnode->fid = data->fid;
+
+	/* ask the server for a status check */
+	ret = afs_vnode_fetch_status(vnode);
+	if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" [bad inode]");
+		return;
+	}
+
+	ret = afs_inode_map_status(vnode);
+	if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" [bad inode]");
+		return;
+	}
+
+	_leave("");
+	return;
+} /* end afs_read_inode2() */
+#endif
+
+/*****************************************************************************/
+/*
+ * inode retrieval
+ */
+inline int afs_iget(struct super_block *sb, afs_fid_t *fid, struct inode **_inode)
+{
+	struct afs_iget_data data = { fid: *fid };
+	struct afs_super_info *as;
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	_enter(",{%u,%u,%u},,",fid->vid,fid->vnode,fid->unique);
+
+	as = sb->s_fs_info;
+	data.volume = as->volume;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	inode = iget5_locked(sb,fid->vnode,afs_iget5_test,afs_iget5_set,&data);
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	vnode = AFS_FS_I(inode);
+
+	/* deal with an existing inode */
+	if (!(inode->i_state & I_NEW)) {
+		ret = afs_vnode_fetch_status(vnode);
+		if (ret==0)
+			*_inode = inode;
+		else
+			iput(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* okay... it's a new inode */
+	vnode->flags |= AFS_VNODE_CHANGED;
+	ret = afs_inode_fetch_status(inode);
+	if (ret<0)
+		goto bad_inode;
+
+#if 0
+	/* find a cache entry for it */
+	ret = afs_cache_lookup_vnode(as->volume,vnode);
+	if (ret<0)
+		goto bad_inode;
+#endif
+
+	/* success */
+	unlock_new_inode(inode);
+
+	*_inode = inode;
+	_leave(" = 0 [CB { v=%u x=%lu t=%u nix=%u }]",
+	       vnode->cb_version,
+	       vnode->cb_timeout.timo_jif,
+	       vnode->cb_type,
+	       vnode->nix
+	       );
+	return 0;
+
+	/* failure */
+ bad_inode:
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+
+	_leave(" = %d [bad]",ret);
+	return ret;
+
+#else
+
+	/* pre-allocate a vnode record so that afs_read_inode2() doesn't have to return an inode
+	 * without one attached
+	 */
+	data.new_vnode = kmalloc(sizeof(afs_vnode_t),GFP_KERNEL);
+	if (!data.new_vnode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	inode = iget4(sb,fid->vnode,afs_iget4_test,&data);
+	if (data.new_vnode) kfree(data.new_vnode); 
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	vnode = AFS_FS_I(inode);
+	*_inode = inode;
+	_leave(" = 0 [CB { v=%u x=%lu t=%u nix=%u }]",
+	       vnode->cb_version,
+	       vnode->cb_timeout.timo_jif,
+	       vnode->cb_type,
+	       vnode->nix
+	       );
+	return 0;
+#endif
+} /* end afs_iget() */
+
+/*****************************************************************************/
+/*
+ * read the attributes of an inode
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = dentry->d_inode;
+
+	_enter("{ ino=%lu v=%lu }",inode->i_ino,inode->i_version);
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_inode_fetch_status(inode);
+	if (ret==-ENOENT) {
+		_leave(" = %d [%d %p]",ret,atomic_read(&dentry->d_count),dentry->d_inode);
+		return ret;
+	}
+	else if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* transfer attributes from the inode structure to the stat structure */
+	generic_fillattr(inode,stat);
+
+	_leave(" = 0 CB { v=%u x=%u t=%u }",
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type);
+
+	return 0;
+} /* end afs_inode_getattr() */
+#endif
+
+/*****************************************************************************/
+/*
+ * revalidate the inode
+ */
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+int afs_inode_revalidate(struct dentry *dentry)
+{
+	struct inode *inode;
+	afs_vnode_t *vnode;
+	int ret;
+
+	inode = dentry->d_inode;
+
+	_enter("{ ino=%lu v=%lu }",inode->i_ino,inode->i_version);
+
+	vnode = AFS_FS_I(inode);
+
+	ret = afs_inode_fetch_status(inode);
+	if (ret==-ENOENT) {
+		_leave(" = %d [%d %p]",ret,atomic_read(&dentry->d_count),dentry->d_inode);
+		return ret;
+	}
+	else if (ret<0) {
+		make_bad_inode(inode);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	_leave(" = 0 CB { v=%u x=%u t=%u }",
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type);
+
+	return 0;
+} /* end afs_inode_revalidate() */
+#endif
+
+/*****************************************************************************/
+/*
+ * clear an AFS inode
+ */
+void afs_clear_inode(struct inode *inode)
+{
+	afs_vnode_t *vnode;
+
+	vnode = AFS_FS_I(inode);
+
+	_enter("ino=%lu { vn=%08x v=%u x=%u t=%u }",
+	       inode->i_ino,
+	       vnode->fid.vnode,
+	       vnode->cb_version,
+	       vnode->cb_expiry,
+	       vnode->cb_type
+	       );
+
+	if (inode->i_ino!=vnode->fid.vnode) BUG();
+
+	afs_vnode_give_up_callback(vnode);
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+	if (inode->u.generic_ip) kfree(inode->u.generic_ip);
+#endif
+
+	_leave("");
+} /* end afs_clear_inode() */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
new file mode 100644
index 000000000000..37f84bb11891
--- /dev/null
+++ b/fs/afs/internal.h
@@ -0,0 +1,127 @@
+/* internal.h: internal AFS stuff
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_INTERNAL_H
+#define AFS_INTERNAL_H
+
+#include <linux/version.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * debug tracing
+ */
+#define kenter(FMT,...)	printk("==> %s("FMT")\n",__FUNCTION__,##__VA_ARGS__)
+#define kleave(FMT,...)	printk("<== %s()"FMT"\n",__FUNCTION__,##__VA_ARGS__)
+#define kdebug(FMT,...)	printk(FMT"\n",##__VA_ARGS__)
+#define kproto(FMT,...)	printk("### "FMT"\n",##__VA_ARGS__)
+#define knet(FMT,...)	printk(FMT"\n",##__VA_ARGS__)
+
+#if 0
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...)	kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...)	knet(FMT,##__VA_ARGS__)
+#else
+#define _enter(FMT,...)	do { } while(0)
+#define _leave(FMT,...)	do { } while(0)
+#define _debug(FMT,...)	do { } while(0)
+#define _proto(FMT,...)	do { } while(0)
+#define _net(FMT,...)	do { } while(0)
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+#define wait_on_page_locked wait_on_page
+#define PageUptodate Page_Uptodate
+
+static inline struct proc_dir_entry *PDE(const struct inode *inode)
+{
+	return (struct proc_dir_entry *)inode->u.generic_ip;
+}
+#endif
+
+static inline void afs_discard_my_signals(void)
+{
+	while (signal_pending(current)) {
+		siginfo_t sinfo;
+
+		spin_lock_irq(&current->sig->siglock);
+		dequeue_signal(&current->blocked,&sinfo);
+		spin_unlock_irq(&current->sig->siglock);
+	}
+}
+
+/*
+ * cell.c
+ */
+extern struct rw_semaphore afs_proc_cells_sem;
+extern struct list_head afs_proc_cells;
+
+/*
+ * dir.c
+ */
+extern struct inode_operations afs_dir_inode_operations;
+extern struct file_operations afs_dir_file_operations;
+
+/*
+ * file.c
+ */
+extern struct address_space_operations afs_fs_aops;
+extern struct inode_operations afs_file_inode_operations;
+extern struct file_operations afs_file_file_operations;
+
+/*
+ * inode.c
+ */
+extern int afs_iget(struct super_block *sb, afs_fid_t *fid, struct inode **_inode);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+extern int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+#else
+extern void afs_read_inode2(struct inode *inode, void *opaque);
+extern int afs_inode_revalidate(struct dentry *dentry);
+#endif
+extern void afs_clear_inode(struct inode *inode);
+
+/*
+ * mntpt.c
+ */
+extern struct inode_operations afs_mntpt_inode_operations;
+extern struct file_operations afs_mntpt_file_operations;
+
+extern int afs_mntpt_check_symlink(afs_vnode_t *vnode);
+
+/*
+ * super.c
+ */
+extern int afs_fs_init(void);
+extern void afs_fs_exit(void);
+
+#define AFS_CB_HASH_COUNT (PAGE_SIZE/sizeof(struct list_head))
+
+extern struct list_head afs_cb_hash_tbl[];
+extern spinlock_t afs_cb_hash_lock;
+
+#define afs_cb_hash(SRV,FID) \
+	afs_cb_hash_tbl[((unsigned)(SRV) + (FID)->vid + (FID)->vnode + (FID)->unique) % \
+			AFS_CB_HASH_COUNT]
+
+/*
+ * proc.c
+ */
+extern int afs_proc_init(void);
+extern void afs_proc_cleanup(void);
+extern int afs_proc_cell_setup(afs_cell_t *cell);
+extern void afs_proc_cell_remove(afs_cell_t *cell);
+
+#endif /* AFS_INTERNAL_H */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
new file mode 100644
index 000000000000..2891e98be91d
--- /dev/null
+++ b/fs/afs/kafsasyncd.c
@@ -0,0 +1,260 @@
+/* kafsasyncd.c: AFS asynchronous operation daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * The AFS async daemon is used to the following:
+ * - probe "dead" servers to see whether they've come back to life yet.
+ * - probe "live" servers that we haven't talked to for a while to see if they are better
+ *   candidates for serving than what we're currently using
+ * - poll volume location servers to keep up to date volume location lists
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "cell.h"
+#include "server.h"
+#include "volume.h"
+#include "kafsasyncd.h"
+#include "kafstimod.h"
+#include <rxrpc/call.h>
+#include <asm/errno.h>
+#include "internal.h"
+
+static DECLARE_COMPLETION(kafsasyncd_alive);
+static DECLARE_COMPLETION(kafsasyncd_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafsasyncd_sleepq);
+static struct task_struct *kafsasyncd_task;
+static int kafsasyncd_die;
+
+static int kafsasyncd(void *arg);
+
+static LIST_HEAD(kafsasyncd_async_attnq);
+static LIST_HEAD(kafsasyncd_async_busyq);
+static spinlock_t kafsasyncd_async_lock = SPIN_LOCK_UNLOCKED;
+
+static void kafsasyncd_null_call_attn_func(struct rxrpc_call *call)
+{
+}
+
+static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
+{
+}
+
+/*****************************************************************************/
+/*
+ * start the async daemon
+ */
+int afs_kafsasyncd_start(void)
+{
+	int ret;
+
+	ret = kernel_thread(kafsasyncd,NULL,0);
+	if (ret<0)
+		return ret;
+
+	wait_for_completion(&kafsasyncd_alive);
+
+	return ret;
+} /* end afs_kafsasyncd_start() */
+
+/*****************************************************************************/
+/*
+ * stop the async daemon
+ */
+void afs_kafsasyncd_stop(void)
+{
+	/* get rid of my daemon */
+	kafsasyncd_die = 1;
+	wake_up(&kafsasyncd_sleepq);
+	wait_for_completion(&kafsasyncd_dead);
+
+} /* end afs_kafsasyncd_stop() */
+
+/*****************************************************************************/
+/*
+ * probing daemon
+ */
+static int kafsasyncd(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct list_head *_p;
+	int die;
+
+	kafsasyncd_task = current;
+
+	printk("kAFS: Started kafsasyncd %d\n",current->pid);
+	strcpy(current->comm,"kafsasyncd");
+
+	daemonize();
+
+	complete(&kafsasyncd_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&kafsasyncd_sleepq,&myself);
+
+		for (;;) {
+			if (!list_empty(&kafsasyncd_async_attnq) ||
+			    signal_pending(current) ||
+			    kafsasyncd_die)
+				break;
+
+			schedule();
+			set_current_state(TASK_INTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&kafsasyncd_sleepq,&myself);
+		set_current_state(TASK_RUNNING);
+
+		/* discard pending signals */
+		afs_discard_my_signals();
+
+		die = kafsasyncd_die;
+
+		/* deal with the next asynchronous operation requiring attention */
+		if (!list_empty(&kafsasyncd_async_attnq)) {
+			struct afs_async_op *op;
+
+			_debug("@@@ Begin Asynchronous Operation");
+
+			op = NULL;
+			spin_lock(&kafsasyncd_async_lock);
+
+			if (!list_empty(&kafsasyncd_async_attnq)) {
+				op = list_entry(kafsasyncd_async_attnq.next,afs_async_op_t,link);
+				list_del(&op->link);
+				list_add_tail(&op->link,&kafsasyncd_async_busyq);
+			}
+
+			spin_unlock(&kafsasyncd_async_lock);
+
+			_debug("@@@ Operation %p {%p}\n",op,op?op->ops:NULL);
+
+			if (op)
+				op->ops->attend(op);
+
+			_debug("@@@ End Asynchronous Operation");
+		}
+
+	} while(!die);
+
+	/* need to kill all outstanding asynchronous operations before exiting */
+	kafsasyncd_task = NULL;
+	spin_lock(&kafsasyncd_async_lock);
+
+	/* fold the busy and attention queues together */
+	list_splice(&kafsasyncd_async_busyq,&kafsasyncd_async_attnq);
+	list_del_init(&kafsasyncd_async_busyq);
+
+	/* dequeue kafsasyncd from all their wait queues */
+	list_for_each(_p,&kafsasyncd_async_attnq) {
+		afs_async_op_t *op = list_entry(_p,afs_async_op_t,link);
+
+		op->call->app_attn_func = kafsasyncd_null_call_attn_func;
+		op->call->app_error_func = kafsasyncd_null_call_error_func;
+		remove_wait_queue(&op->call->waitq,&op->waiter);
+	}
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	/* abort all the operations */
+	while (!list_empty(&kafsasyncd_async_attnq)) {
+		afs_async_op_t *op = list_entry(_p,afs_async_op_t,link);
+		list_del_init(&op->link);
+
+		rxrpc_call_abort(op->call,-EIO);
+		rxrpc_put_call(op->call);
+		op->call = NULL;
+
+		op->ops->discard(op);
+	}
+
+	/* and that's all */
+	_leave("");
+	complete_and_exit(&kafsasyncd_dead,0);
+
+} /* end kafsasyncd() */
+
+/*****************************************************************************/
+/*
+ * begin an operation
+ * - place operation on busy queue
+ */
+void afs_kafsasyncd_begin_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	init_waitqueue_entry(&op->waiter,kafsasyncd_task);
+
+	list_del(&op->link);
+	list_add_tail(&op->link,&kafsasyncd_async_busyq);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	_leave("");
+} /* end afs_kafsasyncd_begin_op() */
+
+/*****************************************************************************/
+/*
+ * request attention for an operation
+ * - move to attention queue
+ */
+void afs_kafsasyncd_attend_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	list_del(&op->link);
+	list_add_tail(&op->link,&kafsasyncd_async_attnq);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	wake_up(&kafsasyncd_sleepq);
+
+	_leave("");
+} /* end afs_kafsasyncd_attend_op() */
+
+/*****************************************************************************/
+/*
+ * terminate an operation
+ * - remove from either queue
+ */
+void afs_kafsasyncd_terminate_op(afs_async_op_t *op)
+{
+	_enter("");
+
+	spin_lock(&kafsasyncd_async_lock);
+
+	list_del_init(&op->link);
+
+	spin_unlock(&kafsasyncd_async_lock);
+
+	wake_up(&kafsasyncd_sleepq);
+
+	_leave("");
+} /* end afs_kafsasyncd_terminate_op() */
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
new file mode 100644
index 000000000000..6438c17833a1
--- /dev/null
+++ b/fs/afs/kafsasyncd.h
@@ -0,0 +1,49 @@
+/* kafsasyncd.h: AFS asynchronous operation daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_KAFSASYNCD_H
+#define _LINUX_AFS_KAFSASYNCD_H
+
+#include "types.h"
+
+struct afs_async_op_ops {
+	void (*attend)(afs_async_op_t *op);
+	void (*discard)(afs_async_op_t *op);
+};
+
+/*****************************************************************************/
+/*
+ * asynchronous operation record
+ */
+struct afs_async_op
+{
+	struct list_head		link;
+	afs_server_t			*server;	/* server being contacted */
+	struct rxrpc_call		*call;		/* RxRPC call performing op */
+	wait_queue_t			waiter;		/* wait queue for kafsasyncd */
+	const struct afs_async_op_ops	*ops;		/* operations */
+};
+
+static inline void afs_async_op_init(afs_async_op_t *op, const struct afs_async_op_ops *ops)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->call = NULL;
+	op->ops = ops;
+}
+
+extern int afs_kafsasyncd_start(void);
+extern void afs_kafsasyncd_stop(void);
+
+extern void afs_kafsasyncd_begin_op(afs_async_op_t *op);
+extern void afs_kafsasyncd_attend_op(afs_async_op_t *op);
+extern void afs_kafsasyncd_terminate_op(afs_async_op_t *op);
+
+#endif /* _LINUX_AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
new file mode 100644
index 000000000000..ccc1b4e6b94d
--- /dev/null
+++ b/fs/afs/kafstimod.c
@@ -0,0 +1,211 @@
+/* kafstimod.c: AFS timeout daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include "cell.h"
+#include "volume.h"
+#include "kafstimod.h"
+#include <asm/errno.h>
+#include "internal.h"
+
+static DECLARE_COMPLETION(kafstimod_alive);
+static DECLARE_COMPLETION(kafstimod_dead);
+static DECLARE_WAIT_QUEUE_HEAD(kafstimod_sleepq);
+static int kafstimod_die;
+
+static LIST_HEAD(kafstimod_list);
+static spinlock_t kafstimod_lock = SPIN_LOCK_UNLOCKED;
+
+static int kafstimod(void *arg);
+
+/*****************************************************************************/
+/*
+ * start the timeout daemon
+ */
+int afs_kafstimod_start(void)
+{
+	int ret;
+
+	ret = kernel_thread(kafstimod,NULL,0);
+	if (ret<0)
+		return ret;
+
+	wait_for_completion(&kafstimod_alive);
+
+	return ret;
+} /* end afs_kafstimod_start() */
+
+/*****************************************************************************/
+/*
+ * stop the timeout daemon
+ */
+void afs_kafstimod_stop(void)
+{
+	/* get rid of my daemon */
+	kafstimod_die = 1;
+	wake_up(&kafstimod_sleepq);
+	wait_for_completion(&kafstimod_dead);
+
+} /* end afs_kafstimod_stop() */
+
+/*****************************************************************************/
+/*
+ * timeout processing daemon
+ */
+static int kafstimod(void *arg)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	afs_timer_t *timer;
+
+	printk("kAFS: Started kafstimod %d\n",current->pid);
+	strcpy(current->comm,"kafstimod");
+
+	daemonize();
+
+	complete(&kafstimod_alive);
+
+	/* only certain signals are of interest */
+	spin_lock_irq(&current->sig->siglock);
+	siginitsetinv(&current->blocked,0);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,3)
+	recalc_sigpending();
+#else
+	recalc_sigpending(current);
+#endif
+	spin_unlock_irq(&current->sig->siglock);
+
+	/* loop around looking for things to attend to */
+ loop:
+	set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&kafstimod_sleepq,&myself);
+
+	for (;;) {
+		unsigned long jif;
+		signed long timeout;
+
+		/* deal with the server being asked to die */
+		if (kafstimod_die) {
+			remove_wait_queue(&kafstimod_sleepq,&myself);
+			_leave("");
+			complete_and_exit(&kafstimod_dead,0);
+		}
+
+		/* discard pending signals */
+		afs_discard_my_signals();
+
+		/* work out the time to elapse before the next event */
+		spin_lock(&kafstimod_lock);
+		if (list_empty(&kafstimod_list)) {
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		}
+		else {
+			timer = list_entry(kafstimod_list.next,afs_timer_t,link);
+			timeout = timer->timo_jif;
+			jif = jiffies;
+
+			if (time_before_eq(timeout,jif))
+				goto immediate;
+
+			else {
+				timeout = (long)timeout - (long)jiffies;
+			}
+		}
+		spin_unlock(&kafstimod_lock);
+
+		schedule_timeout(timeout);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	/* the thing on the front of the queue needs processing
+	 * - we come here with the lock held and timer pointing to the expired entry
+	 */
+ immediate:
+	remove_wait_queue(&kafstimod_sleepq,&myself);
+	set_current_state(TASK_RUNNING);
+
+	_debug("@@@ Begin Timeout of %p",timer);
+
+	/* dequeue the timer */
+	list_del_init(&timer->link);
+	spin_unlock(&kafstimod_lock);
+
+	/* call the timeout function */
+	timer->ops->timed_out(timer);
+
+	_debug("@@@ End Timeout");
+	goto loop;
+
+} /* end kafstimod() */
+
+/*****************************************************************************/
+/*
+ * (re-)queue a timer
+ */
+void afs_kafstimod_add_timer(afs_timer_t *timer, unsigned long timeout)
+{
+	struct list_head *_p;
+	afs_timer_t *ptimer;
+
+	_enter("%p,%lu",timer,timeout);
+
+	spin_lock(&kafstimod_lock);
+
+	list_del(&timer->link);
+
+	/* the timer was deferred or reset - put it back in the queue at the right place */
+	timer->timo_jif = jiffies + timeout;
+
+	list_for_each(_p,&kafstimod_list) {
+		ptimer = list_entry(_p,afs_timer_t,link);
+		if (time_before(timer->timo_jif,ptimer->timo_jif))
+			break;
+	}
+
+	list_add_tail(&timer->link,_p); /* insert before stopping point */
+
+	spin_unlock(&kafstimod_lock);
+
+	wake_up(&kafstimod_sleepq);
+
+	_leave("");
+} /* end afs_kafstimod_queue_vlocation() */
+
+/*****************************************************************************/
+/*
+ * dequeue a timer
+ * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
+ */
+int afs_kafstimod_del_timer(afs_timer_t *timer)
+{
+	int ret = 0;
+
+	_enter("%p",timer);
+
+	spin_lock(&kafstimod_lock);
+
+	if (list_empty(&timer->link))
+		ret = -ENOENT;
+	else
+		list_del_init(&timer->link);
+
+	spin_unlock(&kafstimod_lock);
+
+	wake_up(&kafstimod_sleepq);
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_kafstimod_del_timer() */
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
new file mode 100644
index 000000000000..342d81d6025b
--- /dev/null
+++ b/fs/afs/kafstimod.h
@@ -0,0 +1,45 @@
+/* kafstimod.h: AFS timeout daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_KAFSTIMOD_H
+#define _LINUX_AFS_KAFSTIMOD_H
+
+#include "types.h"
+
+struct afs_timer_ops {
+	/* called when the front of the timer queue has timed out */
+	void (*timed_out)(struct afs_timer *timer);
+};
+
+/*****************************************************************************/
+/*
+ * AFS timer/timeout record
+ */
+struct afs_timer
+{
+	struct list_head		link;		/* link in timer queue */
+	unsigned long			timo_jif;	/* timeout time */
+	const struct afs_timer_ops	*ops;		/* timeout expiry function */
+};
+
+static inline void afs_timer_init(afs_timer_t *timer, const struct afs_timer_ops *ops)
+{
+	INIT_LIST_HEAD(&timer->link);
+	timer->ops = ops;
+}
+
+extern int afs_kafstimod_start(void);
+extern void afs_kafstimod_stop(void);
+
+extern void afs_kafstimod_add_timer(afs_timer_t *timer, unsigned long timeout);
+extern int afs_kafstimod_del_timer(afs_timer_t *timer);
+
+#endif /* _LINUX_AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
new file mode 100644
index 000000000000..dc20f670a021
--- /dev/null
+++ b/fs/afs/main.c
@@ -0,0 +1,193 @@
+/* main.c: AFS client file system
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/call.h>
+#include <rxrpc/peer.h>
+#include "cell.h"
+#include "server.h"
+#include "fsclient.h"
+#include "cmservice.h"
+#include "kafstimod.h"
+#include "kafsasyncd.h"
+#include "internal.h"
+
+struct rxrpc_transport *afs_transport;
+
+static int afs_init(void);
+static void afs_exit(void);
+static int afs_adding_peer(struct rxrpc_peer *peer);
+static void afs_discarding_peer(struct rxrpc_peer *peer);
+
+module_init(afs_init);
+module_exit(afs_exit);
+
+MODULE_DESCRIPTION("AFS Client File System");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+static struct rxrpc_peer_ops afs_peer_ops = {
+	.adding		= afs_adding_peer,
+	.discarding	= afs_discarding_peer,
+};
+
+struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
+spinlock_t afs_cb_hash_lock = SPIN_LOCK_UNLOCKED;
+
+/*****************************************************************************/
+/*
+ * initialise the AFS client FS module
+ */
+static int afs_init(void)
+{
+	int loop, ret;
+
+	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+
+	/* initialise the callback hash table */
+	spin_lock_init(&afs_cb_hash_lock);
+	for (loop=AFS_CB_HASH_COUNT-1; loop>=0; loop--)
+		INIT_LIST_HEAD(&afs_cb_hash_tbl[loop]);
+
+	/* register the /proc stuff */
+	ret = afs_proc_init();
+	if (ret<0)
+		return ret;
+
+	/* initialise the cell DB */
+	ret = afs_cell_init();
+	if (ret<0)
+		goto error;
+
+	/* start the timeout daemon */
+	ret = afs_kafstimod_start();
+	if (ret<0)
+		goto error;
+
+	/* start the async operation daemon */
+	ret = afs_kafsasyncd_start();
+	if (ret<0)
+		goto error_kafstimod;
+
+	/* create the RxRPC transport */
+	ret = rxrpc_create_transport(7001,&afs_transport);
+	if (ret<0)
+		goto error_kafsasyncd;
+
+	afs_transport->peer_ops = &afs_peer_ops;
+
+	/* register the filesystems */
+	ret = afs_fs_init();
+	if (ret<0)
+		goto error_transport;
+
+	return ret;
+
+ error_transport:
+	rxrpc_put_transport(afs_transport);
+ error_kafsasyncd:
+	afs_kafsasyncd_stop();
+ error_kafstimod:
+	afs_kafstimod_stop();
+ error:
+	afs_cell_purge();
+	afs_proc_cleanup();
+	printk(KERN_ERR "kAFS: failed to register: %d\n",ret);
+	return ret;
+} /* end afs_init() */
+
+/*****************************************************************************/
+/*
+ * clean up on module removal
+ */
+static void afs_exit(void)
+{
+	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
+
+	afs_fs_exit();
+	rxrpc_put_transport(afs_transport);
+	afs_kafstimod_stop();
+	afs_kafsasyncd_stop();
+	afs_cell_purge();
+	afs_proc_cleanup();
+
+} /* end afs_exit() */
+
+/*****************************************************************************/
+/*
+ * notification that new peer record is being added
+ * - called from krxsecd
+ * - return an error to induce an abort
+ * - mustn't sleep (caller holds an rwlock)
+ */
+static int afs_adding_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+	int ret;
+
+	_debug("kAFS: Adding new peer %08x\n",ntohl(peer->addr.s_addr));
+
+	/* determine which server the peer resides in (if any) */
+	ret = afs_server_find_by_peer(peer,&server);
+	if (ret<0)
+		return ret; /* none that we recognise, so abort */
+
+	_debug("Server %p{u=%d}\n",server,atomic_read(&server->usage));
+
+	_debug("Cell %p{u=%d}\n",server->cell,atomic_read(&server->cell->usage));
+
+	/* cross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	peer->user = server;
+	server->peer = peer;
+	spin_unlock(&afs_server_peer_lock);
+
+	afs_put_server(server);
+
+	return 0;
+} /* end afs_adding_peer() */
+
+/*****************************************************************************/
+/*
+ * notification that a peer record is being discarded
+ * - called from krxiod or krxsecd
+ */
+static void afs_discarding_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+
+	_enter("%p",peer);
+
+	_debug("Discarding peer %08x (rtt=%lu.%lumS)\n",
+	       ntohl(peer->addr.s_addr),
+	       peer->rtt/1000,
+	       peer->rtt%1000);
+
+	/* uncross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	server = peer->user;
+	if (server) {
+		peer->user = NULL;
+		server->peer = NULL;
+
+		//_debug("Server %p{u=%d}\n",server,atomic_read(&server->usage));
+		//_debug("Cell %p{u=%d}\n",server->cell,atomic_read(&server->cell->usage));
+	}
+	spin_unlock(&afs_server_peer_lock);
+
+	_leave("");
+
+} /* end afs_discarding_peer() */
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
new file mode 100644
index 000000000000..e4fce66d76e0
--- /dev/null
+++ b/fs/afs/misc.c
@@ -0,0 +1,39 @@
+/* misc.c: miscellaneous bits
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include "errors.h"
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * convert an AFS abort code to a Linux error number
+ */
+int afs_abort_to_error(int abortcode)
+{
+	switch (abortcode) {
+	case VSALVAGE:		return -EIO;
+	case VNOVNODE:		return -ENOENT;
+	case VNOVOL:		return -ENXIO;
+	case VVOLEXISTS:	return -EEXIST;
+	case VNOSERVICE:	return -EIO;
+	case VOFFLINE:		return -ENOENT;
+	case VONLINE:		return -EEXIST;
+	case VDISKFULL:		return -ENOSPC;
+	case VOVERQUOTA:	return -EDQUOT;
+	case VBUSY:		return -EBUSY;
+	case VMOVED:		return -ENXIO;
+	default:		return -EIO;
+	}
+
+} /* end afs_abort_to_error() */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
new file mode 100644
index 000000000000..4e88180f1c09
--- /dev/null
+++ b/fs/afs/mntpt.c
@@ -0,0 +1,112 @@
+/* mntpt.c: mountpoint management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "vnode.h"
+#include "internal.h"
+
+
+static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry);
+static int afs_mntpt_open(struct inode *inode, struct file *file);
+
+struct file_operations afs_mntpt_file_operations = {
+	.open		= afs_mntpt_open,
+};
+
+struct inode_operations afs_mntpt_inode_operations = {
+	.lookup		= afs_mntpt_lookup,
+	.readlink	= page_readlink,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.getattr	= afs_inode_getattr,
+#else
+	.revalidate	= afs_inode_revalidate,
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * check a symbolic link to see whether it actually encodes a mountpoint
+ * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
+ */
+int afs_mntpt_check_symlink(afs_vnode_t *vnode)
+{
+	struct page *page;
+	size_t size;
+	char *buf;
+	int ret;
+
+	_enter("{%u,%u}",vnode->fid.vnode,vnode->fid.unique);
+
+	/* read the contents of the symlink into the pagecache */
+	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping,0,
+			       (filler_t*)AFS_VNODE_TO_I(vnode)->i_mapping->a_ops->readpage,NULL);
+	if (IS_ERR(page)) {
+		ret = PTR_ERR(page);
+		goto out;
+	}
+
+	ret = -EIO;
+	wait_on_page_locked(page);
+	buf = kmap(page);
+	if (!PageUptodate(page))
+		goto out_free;
+	if (PageError(page))
+		goto out_free;
+
+	/* examine the symlink's contents */
+	size = vnode->status.size;
+	_debug("symlink to %*.*s",size,size,buf);
+
+	if (size>2 &&
+	    (buf[0]=='%' || buf[0]=='#') &&
+	    buf[size-1]=='.'
+	    ) {
+		_debug("symlink is a mountpoint");
+		spin_lock(&vnode->lock);
+		vnode->flags |= AFS_VNODE_MOUNTPOINT;
+		spin_unlock(&vnode->lock);
+	}
+
+	ret = 0;
+
+ out_free:
+	kunmap(page);
+	page_cache_release(page);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_mntpt_check_symlink() */
+
+/*****************************************************************************/
+/*
+ * no valid lookup procedure on this sort of dir
+ */
+static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry)
+{
+	return ERR_PTR(-EREMOTE);
+} /* end afs_mntpt_lookup() */
+
+/*****************************************************************************/
+/*
+ * no valid open procedure on this sort of dir
+ */
+static int afs_mntpt_open(struct inode *inode, struct file *file)
+{
+	return -EREMOTE;
+} /* end afs_mntpt_open() */
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
new file mode 100644
index 000000000000..fbdd77878546
--- /dev/null
+++ b/fs/afs/mount.h
@@ -0,0 +1,23 @@
+/* mount.h: mount parameters
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_MOUNT_H
+#define _LINUX_AFS_MOUNT_H
+
+struct afs_mountdata {
+	const char		*volume;	/* name of volume */
+	const char		*cell;		/* name of cell containing volume */
+	const char		*cache;		/* name of cache block device */
+	size_t			nservers;	/* number of server addresses listed */
+	u_int32_t		servers[10];	/* IP addresses of servers in this cell */
+};
+
+#endif /* _LINUX_AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
new file mode 100644
index 000000000000..83fda6decf57
--- /dev/null
+++ b/fs/afs/proc.c
@@ -0,0 +1,739 @@
+/* proc.c: /proc interface for AFS
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "cell.h"
+#include "volume.h"
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static struct proc_dir_entry *proc_afs;
+
+
+static int afs_proc_cells_open(struct inode *inode, struct file *file);
+static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cells_stop(struct seq_file *p, void *v);
+static int afs_proc_cells_show(struct seq_file *m, void *v);
+static ssize_t afs_proc_cells_write(struct file *file, const char *buf, size_t size, loff_t *_pos);
+
+static struct seq_operations afs_proc_cells_ops = {
+	.start	= afs_proc_cells_start,
+	.next	= afs_proc_cells_next,
+	.stop	= afs_proc_cells_stop,
+	.show	= afs_proc_cells_show,
+};
+
+static struct file_operations afs_proc_cells_fops = {
+	.open		= afs_proc_cells_open,
+	.read		= seq_read,
+	.write		= afs_proc_cells_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_volumes_ops = {
+	.start	= afs_proc_cell_volumes_start,
+	.next	= afs_proc_cell_volumes_next,
+	.stop	= afs_proc_cell_volumes_stop,
+	.show	= afs_proc_cell_volumes_show,
+};
+
+static struct file_operations afs_proc_cell_volumes_fops = {
+	.open		= afs_proc_cell_volumes_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_volumes_release,
+};
+
+static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_vlservers_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_vlservers_ops = {
+	.start	= afs_proc_cell_vlservers_start,
+	.next	= afs_proc_cell_vlservers_next,
+	.stop	= afs_proc_cell_vlservers_stop,
+	.show	= afs_proc_cell_vlservers_show,
+};
+
+static struct file_operations afs_proc_cell_vlservers_fops = {
+	.open		= afs_proc_cell_vlservers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_vlservers_release,
+};
+
+static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
+static int afs_proc_cell_servers_release(struct inode *inode, struct file *file);
+static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, loff_t *pos);
+static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
+static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
+
+static struct seq_operations afs_proc_cell_servers_ops = {
+	.start	= afs_proc_cell_servers_start,
+	.next	= afs_proc_cell_servers_next,
+	.stop	= afs_proc_cell_servers_stop,
+	.show	= afs_proc_cell_servers_show,
+};
+
+static struct file_operations afs_proc_cell_servers_fops = {
+	.open		= afs_proc_cell_servers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_cell_servers_release,
+};
+
+/*****************************************************************************/
+/*
+ * initialise the /proc/fs/afs/ directory
+ */
+int afs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	_enter("");
+
+	proc_afs = proc_mkdir("fs/afs",NULL);
+	if (!proc_afs)
+		goto error;
+	proc_afs->owner = THIS_MODULE;
+
+	p = create_proc_entry("cells",0,proc_afs);
+	if (!p)
+		goto error_proc;
+	p->proc_fops = &afs_proc_cells_fops;
+	p->owner = THIS_MODULE;
+
+	_leave(" = 0");
+	return 0;
+
+#if 0
+ error_cells:
+	remove_proc_entry("cells",proc_afs);
+#endif
+ error_proc:
+	remove_proc_entry("fs/afs",NULL);
+ error:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+} /* end afs_proc_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the /proc/fs/afs/ directory
+ */
+void afs_proc_cleanup(void)
+{
+	remove_proc_entry("cells",proc_afs);
+
+	remove_proc_entry("fs/afs",NULL);
+
+} /* end afs_proc_cleanup() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/cells" which provides a summary of extant cells
+ */
+static int afs_proc_cells_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file,&afs_proc_cells_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end afs_proc_cells_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&afs_proc_cells_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&afs_proc_cells)
+		if (!pos--)
+			break;
+
+	return _p!=&afs_proc_cells ? _p : NULL;
+} /* end afs_proc_cells_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? afs_proc_cells.next : _p->next;
+
+	return _p!=&afs_proc_cells ? _p : NULL;
+} /* end afs_proc_cells_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cells_stop(struct seq_file *p, void *v)
+{
+	up_read(&afs_proc_cells_sem);
+
+} /* end afs_proc_cells_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of cell lines
+ */
+static int afs_proc_cells_show(struct seq_file *m, void *v)
+{
+	afs_cell_t *cell = list_entry(v,afs_cell_t,proc_link);
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE NAME\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%3d %s\n",atomic_read(&cell->usage),cell->name);
+
+	return 0;
+} /* end afs_proc_cells_show() */
+
+/*****************************************************************************/
+/*
+ * handle writes to /proc/fs/afs/cells
+ * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]*
+ */
+static ssize_t afs_proc_cells_write(struct file *file, const char *buf, size_t size, loff_t *_pos)
+{
+	char *kbuf, *name, *args;
+	int ret;
+
+	/* start by dragging the command into memory */
+	if (size<=1 || size>=PAGE_SIZE)
+		return -EINVAL;
+
+	kbuf = kmalloc(size+1,GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(kbuf,buf,size)!=0)
+		goto done;
+	kbuf[size] = 0;
+
+	/* trim to first NL */
+	name = memchr(kbuf,'\n',size);
+	if (name) *name = 0;
+
+	/* split into command, name and argslist */
+	name = strchr(kbuf,' ');
+	if (!name) goto inval;
+	do { *name++ = 0; } while(*name==' ');
+	if (!*name) goto inval;
+
+	args = strchr(name,' ');
+	if (!args) goto inval;
+	do { *args++ = 0; } while(*args==' ');
+	if (!*args) goto inval;
+
+	/* determine command to perform */
+	_debug("cmd=%s name=%s args=%s",kbuf,name,args);
+
+	if (strcmp(kbuf,"add")==0) {
+		afs_cell_t *cell;
+		ret = afs_cell_create(name,args,&cell);
+		if (ret<0)
+			goto done;
+
+		printk("kAFS: Added new cell '%s'\n",name);
+	}
+	else {
+		goto inval;
+	}
+
+	ret = size;
+
+ done:
+	kfree(kbuf);
+	_leave(" = %d",ret);
+	return ret;
+
+ inval:
+	ret = -EINVAL;
+	printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
+	goto done;
+} /* end afs_proc_cells_write() */
+
+/*****************************************************************************/
+/*
+ * initialise /proc/fs/afs/<cell>/
+ */
+int afs_proc_cell_setup(afs_cell_t *cell)
+{
+	struct proc_dir_entry *p;
+
+	_enter("%p{%s}",cell,cell->name);
+
+	cell->proc_dir = proc_mkdir(cell->name,proc_afs);
+	if (!cell->proc_dir)
+		return -ENOMEM;
+
+	p = create_proc_entry("servers",0,cell->proc_dir);
+	if (!p)
+		goto error_proc;
+	p->proc_fops = &afs_proc_cell_servers_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	p = create_proc_entry("vlservers",0,cell->proc_dir);
+	if (!p)
+		goto error_servers;
+	p->proc_fops = &afs_proc_cell_vlservers_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	p = create_proc_entry("volumes",0,cell->proc_dir);
+	if (!p)
+		goto error_vlservers;
+	p->proc_fops = &afs_proc_cell_volumes_fops;
+	p->owner = THIS_MODULE;
+	p->data = cell;
+
+	_leave(" = 0");
+	return 0;
+
+ error_vlservers:
+	remove_proc_entry("vlservers",cell->proc_dir);
+ error_servers:
+	remove_proc_entry("servers",cell->proc_dir);
+ error_proc:
+	remove_proc_entry(cell->name,proc_afs);
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+} /* end afs_proc_cell_setup() */
+
+/*****************************************************************************/
+/*
+ * remove /proc/fs/afs/<cell>/
+ */
+void afs_proc_cell_remove(afs_cell_t *cell)
+{
+	_enter("");
+
+	remove_proc_entry("volumes",cell->proc_dir);
+	remove_proc_entry("vlservers",cell->proc_dir);
+	remove_proc_entry("servers",cell->proc_dir);
+	remove_proc_entry(cell->name,proc_afs);
+
+	_leave("");
+} /* end afs_proc_cell_remove() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
+ */
+static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_volumes_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_volumes_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_volumes_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	down_read(&cell->vl_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&cell->vl_list)
+		if (!pos--)
+			break;
+
+	return _p!=&cell->vl_list ? _p : NULL;
+} /* end afs_proc_cell_volumes_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = p->private;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	(*_pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? cell->vl_list.next : _p->next;
+
+	return _p!=&cell->vl_list ? _p : NULL;
+} /* end afs_proc_cell_volumes_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	up_read(&cell->vl_sem);
+
+} /* end afs_proc_cell_volumes_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
+{
+	afs_vlocation_t *vlocation = list_entry(v,afs_vlocation_t,link);
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE VLID[0]  VLID[1]  VLID[2]  NAME\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%3d %08x %08x %08x %s\n",
+		   atomic_read(&vlocation->usage),
+		   vlocation->vldb.vid[0],
+		   vlocation->vldb.vid[1],
+		   vlocation->vldb.vid[2],
+		   vlocation->vldb.name
+		   );
+
+	return 0;
+} /* end afs_proc_cell_volumes_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume location server
+ */
+static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_vlservers_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_vlservers_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_vlservers_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_vlservers_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+{
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	down_read(&cell->vl_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	if (pos>=cell->vl_naddrs)
+		return NULL;
+
+	return &cell->vl_addrs[pos];
+} /* end afs_proc_cell_vlservers_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	afs_cell_t *cell = p->private;
+	loff_t pos;
+
+	_enter("cell=%p{nad=%u} pos=%Ld",cell,cell->vl_naddrs,*_pos);
+
+	pos = *_pos;
+	(*_pos)++;
+	if (pos>=cell->vl_naddrs)
+		return NULL;
+
+	return &cell->vl_addrs[pos];
+} /* end afs_proc_cell_vlservers_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	up_read(&cell->vl_sem);
+
+} /* end afs_proc_cell_vlservers_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
+{
+	struct in_addr *addr = v;
+
+	/* display header on line 1 */
+	if (v == (struct in_addr *)1) {
+		seq_puts(m,"ADDRESS\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	seq_printf(m,"%u.%u.%u.%u\n",NIPQUAD(addr->s_addr));
+
+	return 0;
+} /* end afs_proc_cell_vlservers_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/fs/afs/<cell>/servers" which provides a summary of active servers
+ */
+static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	afs_cell_t *cell;
+	int ret;
+
+	cell = afs_get_cell_maybe((afs_cell_t**)&PDE(inode)->data);
+	if (!cell)
+		return -ENOENT;
+
+	ret = seq_open(file,&afs_proc_cell_servers_ops);
+	if (ret<0)
+		return ret;
+
+	m = file->private_data;
+	m->private = cell;
+
+	return 0;
+} /* end afs_proc_cell_servers_open() */
+
+/*****************************************************************************/
+/*
+ * close the file and release the ref to the cell
+ */
+static int afs_proc_cell_servers_release(struct inode *inode, struct file *file)
+{
+	afs_cell_t *cell = PDE(inode)->data;
+	int ret;
+
+	ret = seq_release(inode,file);
+
+	afs_put_cell(cell);
+
+} /* end afs_proc_cell_servers_release() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the cells list and return the first item
+ */
+static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = m->private;
+	loff_t pos = *_pos;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	/* lock the list against modification */
+	read_lock(&cell->sv_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return (void *)1;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p,&cell->sv_list)
+		if (!pos--)
+			break;
+
+	return _p!=&cell->sv_list ? _p : NULL;
+} /* end afs_proc_cell_servers_start() */
+
+/*****************************************************************************/
+/*
+ * move to next cell in cells list
+ */
+static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, loff_t *_pos)
+{
+	struct list_head *_p;
+	afs_cell_t *cell = p->private;
+
+	_enter("cell=%p pos=%Ld",cell,*_pos);
+
+	(*_pos)++;
+
+	_p = v;
+	_p = v==(void*)1 ? cell->sv_list.next : _p->next;
+
+	return _p!=&cell->sv_list ? _p : NULL;
+} /* end afs_proc_cell_servers_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the cells list
+ */
+static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
+{
+	afs_cell_t *cell = p->private;
+
+	read_unlock(&cell->sv_lock);
+
+} /* end afs_proc_cell_servers_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of volume lines
+ */
+static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
+{
+	afs_server_t *server = list_entry(v,afs_server_t,link);
+	char ipaddr[20];
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "USE ADDR            STATE\n");
+		return 0;
+	}
+
+	/* display one cell per line on subsequent lines */
+	sprintf(ipaddr,"%u.%u.%u.%u",NIPQUAD(server->addr));
+	seq_printf(m,"%3d %-15.15s %5d\n",
+		   atomic_read(&server->usage),
+		   ipaddr,
+		   server->fs_state
+		   );
+
+	return 0;
+} /* end afs_proc_cell_servers_show() */
diff --git a/fs/afs/server.c b/fs/afs/server.c
new file mode 100644
index 000000000000..b249d7cc3261
--- /dev/null
+++ b/fs/afs/server.c
@@ -0,0 +1,489 @@
+/* server.c: AFS server record management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include "volume.h"
+#include "cell.h"
+#include "server.h"
+#include "transport.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include "internal.h"
+
+spinlock_t afs_server_peer_lock = SPIN_LOCK_UNLOCKED;
+
+#define FS_SERVICE_ID		1	/* AFS Volume Location Service ID */
+#define VL_SERVICE_ID		52	/* AFS Volume Location Service ID */
+
+static void __afs_server_timeout(afs_timer_t *timer)
+{
+	afs_server_t *server = list_entry(timer,afs_server_t,timeout);
+
+	_debug("SERVER TIMEOUT [%p{u=%d}]",server,atomic_read(&server->usage));
+
+	afs_server_do_timeout(server);
+}
+
+static const struct afs_timer_ops afs_server_timer_ops = {
+	.timed_out	= __afs_server_timeout,
+};
+
+/*****************************************************************************/
+/*
+ * lookup a server record in a cell
+ * - TODO: search the cell's server list
+ */
+int afs_server_lookup(afs_cell_t *cell, const struct in_addr *addr, afs_server_t **_server)
+{
+	struct list_head *_p;
+	afs_server_t *server, *active, *zombie;
+	int loop;
+
+	_enter("%p,%08x,",cell,ntohl(addr->s_addr));
+
+	/* allocate and initialise a server record */
+	server = kmalloc(sizeof(afs_server_t),GFP_KERNEL);
+	if (!server) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(server,0,sizeof(afs_server_t));
+	atomic_set(&server->usage,1);
+
+	INIT_LIST_HEAD(&server->link);
+	init_rwsem(&server->sem);
+	INIT_LIST_HEAD(&server->fs_callq);
+	spin_lock_init(&server->fs_lock);
+	INIT_LIST_HEAD(&server->cb_promises);
+	spin_lock_init(&server->cb_lock);
+
+	for (loop=0; loop<AFS_SERVER_CONN_LIST_SIZE; loop++)
+		server->fs_conn_cnt[loop] = 4;
+
+	memcpy(&server->addr,addr,sizeof(struct in_addr));
+	server->addr.s_addr = addr->s_addr;
+
+	afs_timer_init(&server->timeout,&afs_server_timer_ops);
+
+	/* add to the cell */
+	write_lock(&cell->sv_lock);
+
+	/* check the active list */
+	list_for_each(_p,&cell->sv_list) {
+		active = list_entry(_p,afs_server_t,link);
+
+		if (active->addr.s_addr==addr->s_addr)
+			goto use_active_server;
+	}
+
+	/* check the inactive list */
+	spin_lock(&cell->sv_gylock);
+	list_for_each(_p,&cell->sv_graveyard) {
+		zombie = list_entry(_p,afs_server_t,link);
+
+		if (zombie->addr.s_addr==addr->s_addr)
+			goto resurrect_server;
+	}
+	spin_unlock(&cell->sv_gylock);
+
+	afs_get_cell(cell);
+	server->cell = cell;
+	list_add_tail(&server->link,&cell->sv_list);
+
+	write_unlock(&cell->sv_lock);
+
+	*_server = server;
+	_leave(" = 0 (%p)",server);
+	return 0;
+
+	/* found a matching active server */
+ use_active_server:
+	_debug("active server");
+	afs_get_server(active);
+	write_unlock(&cell->sv_lock);
+
+	kfree(server);
+
+	*_server = active;
+	_leave(" = 0 (%p)",active);
+	return 0;
+
+	/* found a matching server in the graveyard, so resurrect it and dispose of the new rec */
+ resurrect_server:
+	_debug("resurrecting server");
+
+	list_del(&zombie->link);
+	list_add_tail(&zombie->link,&cell->sv_list);
+	afs_get_server(zombie);
+	afs_kafstimod_del_timer(&zombie->timeout);
+	spin_unlock(&cell->sv_gylock);
+	write_unlock(&cell->sv_lock);
+
+	kfree(server);
+
+	*_server = zombie;
+	_leave(" = 0 (%p)",zombie);
+	return 0;
+
+} /* end afs_server_lookup() */
+
+/*****************************************************************************/
+/*
+ * destroy a server record
+ * - removes from the cell list
+ */
+void afs_put_server(afs_server_t *server)
+{
+	afs_cell_t *cell;
+
+	_enter("%p",server);
+
+	cell = server->cell;
+
+	/* sanity check */
+	if (atomic_read(&server->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	write_lock(&cell->sv_lock);
+
+	if (likely(!atomic_dec_and_test(&server->usage))) {
+		write_unlock(&cell->sv_lock);
+		_leave("");
+		return;
+	}
+
+	spin_lock(&cell->sv_gylock);
+	list_del(&server->link);
+	list_add_tail(&server->link,&cell->sv_graveyard);
+
+	/* time out in 10 secs */
+	afs_kafstimod_add_timer(&server->timeout,10*HZ);
+
+	spin_unlock(&cell->sv_gylock);
+	write_unlock(&cell->sv_lock);
+
+	_leave(" [killed]");
+} /* end afs_put_server() */
+
+/*****************************************************************************/
+/*
+ * timeout server record
+ * - removes from the cell's graveyard if the usage count is zero
+ */
+void afs_server_do_timeout(afs_server_t *server)
+{
+	struct rxrpc_peer *peer;
+	afs_cell_t *cell;
+	int loop;
+
+	_enter("%p",server);
+
+	cell = server->cell;
+
+	if (atomic_read(&server->usage)<0) BUG();
+
+	/* remove from graveyard if still dead */
+	spin_lock(&cell->vl_gylock);
+	if (atomic_read(&server->usage)==0)
+		list_del_init(&server->link);
+	else
+		server = NULL;
+	spin_unlock(&cell->vl_gylock);
+
+	if (!server) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	/* we can now destroy it properly */
+	afs_put_cell(cell);
+
+	/* uncross-point the structs under a global lock */
+	spin_lock(&afs_server_peer_lock);
+	peer = server->peer;
+	if (peer) {
+		server->peer = NULL;
+		peer->user = NULL;
+	}
+	spin_unlock(&afs_server_peer_lock);
+
+	/* finish cleaning up the server */
+	for (loop=AFS_SERVER_CONN_LIST_SIZE-1; loop>=0; loop--)
+		if (server->fs_conn[loop])
+			rxrpc_put_connection(server->fs_conn[loop]);
+
+	if (server->vlserver)
+		rxrpc_put_connection(server->vlserver);
+
+	kfree(server);
+
+	_leave(" [destroyed]");
+} /* end afs_server_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * get a callslot on a connection to the fileserver on the specified server
+ */
+int afs_server_request_callslot(afs_server_t *server, struct afs_server_callslot *callslot)
+{
+	struct afs_server_callslot *pcallslot;
+	struct rxrpc_connection *conn;
+	int nconn, ret;
+
+	_enter("%p,",server);
+
+	INIT_LIST_HEAD(&callslot->link);
+	callslot->task = current;
+	callslot->conn = NULL;
+	callslot->nconn = -1;
+	callslot->ready = 0;
+
+	ret = 0;
+	conn = NULL;
+
+	/* get hold of a callslot first */
+	spin_lock(&server->fs_lock);
+
+	/* resurrect the server if it's death timeout has expired */
+	if (server->fs_state) {
+		if (time_before(jiffies,server->fs_dead_jif)) {
+			ret = server->fs_state;
+			spin_unlock(&server->fs_lock);
+			_leave(" = %d [still dead]",ret);
+			return ret;
+		}
+
+		server->fs_state = 0;
+	}
+
+	/* try and find a connection that has spare callslots */
+	for (nconn=0; nconn<AFS_SERVER_CONN_LIST_SIZE; nconn++) {
+		if (server->fs_conn_cnt[nconn]>0) {
+			server->fs_conn_cnt[nconn]--;
+			spin_unlock(&server->fs_lock);
+			callslot->nconn = nconn;
+			goto obtained_slot;
+		}
+	}
+
+	/* none were available - wait interruptibly for one to become available */
+	set_current_state(TASK_INTERRUPTIBLE);
+	list_add_tail(&callslot->link,&server->fs_callq);
+	spin_unlock(&server->fs_lock);
+
+	while (!callslot->ready && !signal_pending(current)) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	/* even if we were interrupted we may still be queued */
+	if (!callslot->ready) {
+		spin_lock(&server->fs_lock);
+		list_del_init(&callslot->link);
+		spin_unlock(&server->fs_lock);
+	}
+
+	nconn = callslot->nconn;
+
+	/* if interrupted, we must release any slot we also got before returning an error */
+	if (signal_pending(current)) {
+		ret = -EINTR;
+		goto error_release;
+	}
+
+	/* if we were woken up with an error, then pass that error back to the called */
+	if (nconn<0) {
+		_leave(" = %d",callslot->errno);
+		return callslot->errno;
+	}
+
+	/* were we given a connection directly? */
+	if (callslot->conn) {
+		/* yes - use it */
+		_leave(" = 0 (nc=%d)",nconn);
+		return 0;
+	}
+
+	/* got a callslot, but no connection */
+ obtained_slot:
+
+	/* need to get hold of the RxRPC connection */
+	down_write(&server->sem);
+
+	/* quick check to see if there's an outstanding error */
+	ret = server->fs_state;
+	if (ret)
+		goto error_release_upw;
+
+	if (server->fs_conn[nconn]) {
+		/* reuse an existing connection */
+		rxrpc_get_connection(server->fs_conn[nconn]);
+		callslot->conn = server->fs_conn[nconn];
+	}
+	else {
+		/* create a new connection */
+		ret = rxrpc_create_connection(afs_transport,
+					      htons(7000),
+					      server->addr.s_addr,
+					      FS_SERVICE_ID,
+					      NULL,
+					      &server->fs_conn[nconn]);
+
+		if (ret<0)
+			goto error_release_upw;
+
+		callslot->conn = server->fs_conn[0];
+		rxrpc_get_connection(callslot->conn);
+	}
+
+	up_write(&server->sem);
+
+ 	_leave(" = 0");
+	return 0;
+
+	/* handle an error occurring */
+ error_release_upw:
+	up_write(&server->sem);
+
+ error_release:
+	/* either release the callslot or pass it along to another deserving task */
+	spin_lock(&server->fs_lock);
+
+	if (nconn<0) {
+		/* no callslot allocated */
+	}
+	else if (list_empty(&server->fs_callq)) {
+		/* no one waiting */
+		server->fs_conn_cnt[nconn]++;
+		spin_unlock(&server->fs_lock);
+	}
+	else {
+		/* someone's waiting - dequeue them and wake them up */
+		pcallslot = list_entry(server->fs_callq.next,struct afs_server_callslot,link);
+		list_del_init(&pcallslot->link);
+
+		pcallslot->errno = server->fs_state;
+		if (!pcallslot->errno) {
+			/* pass them out callslot details */
+			callslot->conn = xchg(&pcallslot->conn,callslot->conn);
+			pcallslot->nconn = nconn;
+			callslot->nconn = nconn = -1;
+		}
+		pcallslot->ready = 1;
+		wake_up_process(pcallslot->task);
+		spin_unlock(&server->fs_lock);
+	}
+
+	if (callslot->conn) rxrpc_put_connection(callslot->conn);
+	callslot->conn = NULL;
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_server_request_callslot() */
+
+/*****************************************************************************/
+/*
+ * release a callslot back to the server
+ * - transfers the RxRPC connection to the next pending callslot if possible
+ */
+void afs_server_release_callslot(afs_server_t *server, struct afs_server_callslot *callslot)
+{
+	struct afs_server_callslot *pcallslot;
+
+	_enter("{ad=%08x,cnt=%u},{%d}",
+	       ntohl(server->addr.s_addr),
+	       server->fs_conn_cnt[callslot->nconn],
+	       callslot->nconn);
+
+	if (callslot->nconn<0) BUG();
+
+	spin_lock(&server->fs_lock);
+
+	if (list_empty(&server->fs_callq)) {
+		/* no one waiting */
+		server->fs_conn_cnt[callslot->nconn]++;
+		spin_unlock(&server->fs_lock);
+	}
+	else {
+		/* someone's waiting - dequeue them and wake them up */
+		pcallslot = list_entry(server->fs_callq.next,struct afs_server_callslot,link);
+		list_del_init(&pcallslot->link);
+
+		pcallslot->errno = server->fs_state;
+		if (!pcallslot->errno) {
+			/* pass them out callslot details */
+			callslot->conn = xchg(&pcallslot->conn,callslot->conn);
+			pcallslot->nconn = callslot->nconn;
+			callslot->nconn = -1;
+		}
+
+		pcallslot->ready = 1;
+		wake_up_process(pcallslot->task);
+		spin_unlock(&server->fs_lock);
+	}
+
+	if (callslot->conn) rxrpc_put_connection(callslot->conn);
+
+	_leave("");
+} /* end afs_server_release_callslot() */
+
+/*****************************************************************************/
+/*
+ * get a handle to a connection to the vlserver (volume location) on the specified server
+ */
+int afs_server_get_vlconn(afs_server_t *server, struct rxrpc_connection **_conn)
+{
+	struct rxrpc_connection *conn;
+	int ret;
+
+	_enter("%p,",server);
+
+	ret = 0;
+	conn = NULL;
+	down_read(&server->sem);
+
+	if (server->vlserver) {
+		/* reuse an existing connection */
+		rxrpc_get_connection(server->vlserver);
+		conn = server->vlserver;
+		up_read(&server->sem);
+	}
+	else {
+		/* create a new connection */
+		up_read(&server->sem);
+		down_write(&server->sem);
+		if (!server->vlserver) {
+			ret = rxrpc_create_connection(afs_transport,
+						      htons(7003),
+						      server->addr.s_addr,
+						      VL_SERVICE_ID,
+						      NULL,
+						      &server->vlserver);
+		}
+		if (ret==0) {
+			rxrpc_get_connection(server->vlserver);
+			conn = server->vlserver;
+		}
+		up_write(&server->sem);
+	}
+
+	*_conn = conn;
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_server_get_vlconn() */
diff --git a/fs/afs/server.h b/fs/afs/server.h
new file mode 100644
index 000000000000..feddacf2c954
--- /dev/null
+++ b/fs/afs/server.h
@@ -0,0 +1,97 @@
+/* server.h: AFS server record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_SERVER_H
+#define _LINUX_AFS_SERVER_H
+
+#include "types.h"
+#include "kafstimod.h"
+#include <rxrpc/peer.h>
+#include <linux/rwsem.h>
+
+extern spinlock_t afs_server_peer_lock;
+
+/*****************************************************************************/
+/*
+ * AFS server record
+ */
+struct afs_server
+{
+	atomic_t		usage;
+	afs_cell_t		*cell;		/* cell in which server resides */
+	struct list_head	link;		/* link in cell's server list */
+	struct rw_semaphore	sem;		/* access lock */
+	afs_timer_t		timeout;	/* graveyard timeout */
+	struct in_addr		addr;		/* server address */
+	struct rxrpc_peer	*peer;		/* peer record for this server */
+	struct rxrpc_connection	*vlserver;	/* connection to the volume location service */
+
+	/* file service access */
+#define AFS_SERVER_CONN_LIST_SIZE 2
+	struct rxrpc_connection	*fs_conn[AFS_SERVER_CONN_LIST_SIZE]; /* FS connections */
+	unsigned		fs_conn_cnt[AFS_SERVER_CONN_LIST_SIZE];	/* per conn call count */
+	struct list_head	fs_callq;	/* queue of processes waiting to make a call */
+	spinlock_t		fs_lock;	/* access lock */
+	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
+	unsigned		fs_rtt;		/* FS round trip time */
+	unsigned long		fs_act_jif;	/* time at which last activity occurred */
+	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
+
+	/* callback promise management */
+	struct list_head	cb_promises;	/* as yet unbroken promises from this server */
+	spinlock_t		cb_lock;	/* access lock */
+};
+
+extern int afs_server_lookup(afs_cell_t *cell, const struct in_addr *addr, afs_server_t **_server);
+
+#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
+
+extern void afs_put_server(afs_server_t *server);
+extern void afs_server_do_timeout(afs_server_t *server);
+
+extern int afs_server_find_by_peer(const struct rxrpc_peer *peer, afs_server_t **_server);
+
+extern int afs_server_get_vlconn(afs_server_t *server, struct rxrpc_connection **_conn);
+
+static inline afs_server_t *afs_server_get_from_peer(struct rxrpc_peer *peer)
+{
+	afs_server_t *server;
+
+	spin_lock(&afs_server_peer_lock);
+	server = peer->user;
+	if (server)
+		afs_get_server(server);
+	spin_unlock(&afs_server_peer_lock);
+
+	return server;
+}
+
+/*****************************************************************************/
+/*
+ * AFS server callslot grant record
+ */
+struct afs_server_callslot
+{
+	struct list_head	link;		/* link in server's list */
+	struct task_struct	*task;		/* process waiting to make call */
+	struct rxrpc_connection	*conn;		/* connection to use (or NULL on error) */
+	short			nconn;		/* connection slot number (-1 on error) */
+	char			ready;		/* T when ready */
+	int			errno;		/* error number if nconn==-1 */
+};
+
+extern int afs_server_request_callslot(afs_server_t *server,
+				       struct afs_server_callslot *callslot);
+
+extern void afs_server_release_callslot(afs_server_t *server,
+					struct afs_server_callslot *callslot);
+
+#endif /* _LINUX_AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
new file mode 100644
index 000000000000..18056534b504
--- /dev/null
+++ b/fs/afs/super.c
@@ -0,0 +1,595 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Howells <dhowells@redhat.com>
+ *          David Woodhouse <dwmw2@cambridge.redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "vnode.h"
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "super.h"
+#include "internal.h"
+
+#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
+
+static inline char *strdup(const char *s)
+{
+	char *ns = kmalloc(strlen(s)+1,GFP_KERNEL);
+	if (ns)
+		strcpy(ns,s);
+	return ns;
+}
+
+static void afs_i_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct super_block *afs_get_sb(struct file_system_type *fs_type,
+				      int flags, char *dev_name, void *data);
+#else
+static struct super_block *afs_read_super(struct super_block *sb, void *data, int);
+#endif
+
+static struct inode *afs_alloc_inode(struct super_block *sb);
+
+static void afs_put_super(struct super_block *sb);
+
+static void afs_destroy_inode(struct inode *inode);
+
+static struct file_system_type afs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "afs",
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.get_sb		= afs_get_sb,
+	.kill_sb	= kill_anon_super,
+#else
+	.read_super	= afs_read_super,
+#endif
+};
+
+static struct super_operations afs_super_ops = {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	.statfs		= simple_statfs,
+	.alloc_inode	= afs_alloc_inode,
+	.drop_inode	= generic_delete_inode,
+	.destroy_inode	= afs_destroy_inode,
+#else
+	.read_inode2	= afs_read_inode2,
+#endif
+	.clear_inode	= afs_clear_inode,
+	.put_super	= afs_put_super,
+};
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static kmem_cache_t *afs_inode_cachep;
+#endif
+
+/*****************************************************************************/
+/*
+ * initialise the filesystem
+ */
+int __init afs_fs_init(void)
+{
+	int ret;
+
+	kenter("");
+
+	/* open the cache */
+#if 0
+	ret = -EINVAL;
+	if (!cachedev) {
+		printk(KERN_NOTICE "kAFS: No cache device specified as module parm\n");
+		printk(KERN_NOTICE "kAFS: Set with \"cachedev=<devname>\" on insmod's cmdline\n");
+		return ret;
+	}
+
+	ret = afs_cache_open(cachedev,&afs_cache);
+	if (ret<0) {
+		printk(KERN_NOTICE "kAFS: Failed to open cache device\n");
+		return ret;
+	}
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	/* create ourselves an inode cache */
+	ret = -ENOMEM;
+	afs_inode_cachep = kmem_cache_create("afs_inode_cache",
+						sizeof(afs_vnode_t),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						afs_i_init_once,
+						NULL);
+	if (!afs_inode_cachep) {
+		printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
+#if 0
+		afs_put_cache(afs_cache);
+#endif
+		return ret;
+	}
+#endif
+
+	/* now export our filesystem to lesser mortals */
+	ret = register_filesystem(&afs_fs_type);
+	if (ret<0) {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+		kmem_cache_destroy(afs_inode_cachep);
+#endif
+#if 0
+		afs_put_cache(afs_cache);
+#endif
+		kleave(" = %d",ret);
+		return ret;
+	}
+
+	kleave(" = 0");
+	return 0;
+} /* end afs_fs_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the filesystem
+ */
+void __exit afs_fs_exit(void)
+{
+	/* destroy our private inode cache */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	kmem_cache_destroy(afs_inode_cachep);
+#endif
+
+	unregister_filesystem(&afs_fs_type);
+
+#if 0
+	if (afs_cache)
+		afs_put_cache(afs_cache);
+#endif
+} /* end afs_fs_exit() */
+
+/*****************************************************************************/
+/*
+ * check that an argument has a value
+ */
+static int want_arg(char **_value, const char *option)
+{
+	if (!_value || !*_value || !**_value) {
+		printk(KERN_NOTICE "kAFS: %s: argument missing\n",option);
+		return 0;
+	}
+	return 1;
+} /* end want_arg() */
+
+/*****************************************************************************/
+/*
+ * check that there is a value
+ */
+#if 0
+static int want_value(char **_value, const char *option)
+{
+	if (!_value || !*_value || !**_value) {
+		printk(KERN_NOTICE "kAFS: %s: argument incomplete\n",option);
+		return 0;
+	}
+	return 1;
+} /* end want_value() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that there's no subsequent value
+ */
+static int want_no_value(char *const *_value, const char *option)
+{
+	if (*_value && **_value) {
+		printk(KERN_NOTICE "kAFS: %s: Invalid argument: %s\n",option,*_value);
+		return 0;
+	}
+	return 1;
+} /* end want_no_value() */
+
+/*****************************************************************************/
+/*
+ * extract a number from an option string value
+ */
+#if 0
+static int want_number(char **_value, const char *option, unsigned long *number,
+		       unsigned long limit)
+{
+	char *value = *_value;
+
+	if (!want_value(_value,option))
+		return 0;
+
+	*number = simple_strtoul(value,_value,0);
+
+	if (value==*_value) {
+		printk(KERN_NOTICE "kAFS: %s: Invalid number: %s\n",option,value);
+		return 0;
+	}
+
+	if (*number>limit) {
+		printk(KERN_NOTICE "kAFS: %s: numeric value %lu > %lu\n",option,*number,limit);
+		return 0;
+	}
+
+	return 1;
+} /* end want_number() */
+#endif
+
+/*****************************************************************************/
+/*
+ * extract a separator from an option string value
+ */
+#if 0
+static int want_sep(char **_value, const char *option, char sep)
+{
+	if (!want_value(_value,option))
+		return 0;
+
+	if (*(*_value)++ != sep) {
+		printk(KERN_NOTICE "kAFS: %s: '%c' expected: %s\n",option,sep,*_value-1);
+		return 0;
+	}
+
+	return 1;
+} /* end want_number() */
+#endif
+
+/*****************************************************************************/
+/*
+ * extract an IP address from an option string value
+ */
+#if 0
+static int want_ipaddr(char **_value, const char *option, struct in_addr *addr)
+{
+	unsigned long number[4];
+
+	if (!want_value(_value,option))
+		return 0;
+
+	if (!want_number(_value,option,&number[0],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[1],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[2],255) ||
+	    !want_sep(_value,option,'.') ||
+	    !want_number(_value,option,&number[3],255))
+		return 0;
+
+	((u8*)addr)[0] = number[0];
+	((u8*)addr)[1] = number[1];
+	((u8*)addr)[2] = number[2];
+	((u8*)addr)[3] = number[3];
+
+	return 1;
+} /* end want_numeric() */
+#endif
+
+/*****************************************************************************/
+/*
+ * parse the mount options
+ * - this function has been shamelessly adapted from the ext3 fs which shamelessly adapted it from
+ *   the msdos fs
+ */
+static int afs_super_parse_options(struct afs_super_info *as, char *options, char **devname)
+{
+	char *key, *value;
+	int ret;
+
+	_enter("%s",options);
+
+	ret = 0;
+	while ((key = strsep(&options,",")))
+	{
+		value = strchr(key,'=');
+		if (value)
+			*value++ = 0;
+
+		printk("kAFS: KEY: %s, VAL:%s\n",key,value?:"-");
+
+		if (strcmp(key,"rwpath")==0) {
+			if (!want_no_value(&value,"rwpath")) return -EINVAL;
+			as->rwparent = 1;
+			continue;
+		}
+		else if (strcmp(key,"vol")==0) {
+			if (!want_arg(&value,"vol")) return -EINVAL;
+			*devname = value;
+			continue;
+		}
+
+#if 0
+		if (strcmp(key,"servers")==0) {
+			if (!want_arg(&value,"servers")) return -EINVAL;
+
+			_debug("servers=%s",value);
+
+			for (;;) {
+				struct in_addr addr;
+
+				if (!want_ipaddr(&value,"servers",&addr))
+					return -EINVAL;
+
+				ret = afs_create_server(as->cell,&addr,&as->server);
+				if (ret<0) {
+					printk("kAFS: unable to create server: %d\n",ret);
+					return ret;
+				}
+
+				if (!*value)
+					break;
+
+				if (as->server) {
+					printk(KERN_NOTICE
+					       "kAFS: only one server can be specified\n");
+					return -EINVAL;
+				}
+
+				if (!want_sep(&value,"servers",':'))
+					return -EINVAL;
+			}
+			continue;
+		}
+#endif
+
+		printk("kAFS: Unknown mount option: '%s'\n",key);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = 0;
+
+ error:
+	_leave(" = %d",ret);
+
+	return ret;
+} /* end afs_super_parse_options() */
+
+/*****************************************************************************/
+/*
+ * fill in the superblock
+ */
+static int afs_fill_super(struct super_block *sb, void *_data, int silent)
+{
+	struct afs_super_info *as = NULL;
+	struct dentry *root = NULL;
+	struct inode *inode = NULL;
+	afs_fid_t fid;
+	void **data = _data;
+	char *options, *devname;
+	int ret;
+
+	_enter("");
+
+	if (!data) {
+		_leave(" = -EINVAL");
+		return -EINVAL;
+	}
+	devname = data[0];
+	options = data[1];
+	if (options)
+		options[PAGE_SIZE-1] = 0;
+
+	/* allocate a superblock info record */
+	as = kmalloc(sizeof(struct afs_super_info),GFP_KERNEL);
+	if (!as) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(as,0,sizeof(struct afs_super_info));
+
+	/* parse the options */
+	if (options) {
+		ret = afs_super_parse_options(as,options,&devname);
+		if (ret<0)
+			goto error;
+		if (!devname) {
+			printk("kAFS: no volume name specified\n");
+			ret = -EINVAL;
+			goto error;
+		}
+	}
+
+	/* parse the device name */
+	ret = afs_volume_lookup(devname,as->rwparent,&as->volume);
+	if (ret<0)
+		goto error;
+
+	/* fill in the superblock */
+	sb->s_blocksize		= PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_magic		= AFS_FS_MAGIC;
+	sb->s_op		= &afs_super_ops;
+	sb->s_fs_info		= as;
+
+	/* allocate the root inode and dentry */
+	fid.vid		= as->volume->vid;
+	fid.vnode	= 1;
+	fid.unique	= 1;
+	ret = afs_iget(sb,&fid,&inode);
+	if (ret<0)
+		goto error;
+
+	ret = -ENOMEM;
+	root = d_alloc_root(inode);
+	if (!root)
+		goto error;
+
+	sb->s_root = root;
+
+	_leave(" = 0");
+	return 0;
+
+ error:
+	if (root) dput(root);
+	if (inode) iput(inode);
+	if (as) {
+		if (as->volume)		afs_put_volume(as->volume);
+		kfree(as);
+	}
+	sb->s_fs_info = NULL;
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_fill_super() */
+
+/*****************************************************************************/
+/*
+ * get an AFS superblock
+ * - TODO: don't use get_sb_nodev(), but rather call sget() directly
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct super_block *afs_get_sb(struct file_system_type *fs_type,
+				      int flags,
+				      char *dev_name,
+				      void *options)
+{
+	struct super_block *sb;
+	void *data[2] = { dev_name, options };
+	int ret;
+
+	_enter(",,%s,%p",dev_name,options);
+
+	/* start the cache manager */
+	ret = afscm_start();
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ERR_PTR(ret);
+	}
+
+	/* allocate a deviceless superblock */
+	sb = get_sb_nodev(fs_type,flags,data,afs_fill_super);
+	if (IS_ERR(sb)) {
+		afscm_stop();
+		return sb;
+	}
+
+	_leave("");
+	return sb;
+} /* end afs_get_sb() */
+#endif
+
+/*****************************************************************************/
+/*
+ * read an AFS superblock
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+static struct super_block *afs_read_super(struct super_block *sb, void *options, int silent)
+{
+	void *data[2] = { NULL, options };
+	int ret;
+
+	_enter(",,%s",(char*)options);
+
+	/* start the cache manager */
+	ret = afscm_start();
+	if (ret<0) {
+		_leave(" = NULL (%d)",ret);
+		return NULL;
+	}
+
+	/* allocate a deviceless superblock */
+	ret = afs_fill_super(sb,data,silent);
+	if (ret<0) {
+		afscm_stop();
+		_leave(" = NULL (%d)",ret);
+		return NULL;
+	}
+
+	_leave(" = %p",sb);
+	return sb;
+} /* end afs_read_super() */
+#endif
+
+/*****************************************************************************/
+/*
+ * finish the unmounting process on the superblock
+ */
+static void afs_put_super(struct super_block *sb)
+{
+	struct afs_super_info *as = sb->s_fs_info;
+
+	_enter("");
+
+	if (as) {
+		if (as->volume)		afs_put_volume(as->volume);
+	}
+
+	/* stop the cache manager */
+	afscm_stop();
+
+	_leave("");
+} /* end afs_put_super() */
+
+/*****************************************************************************/
+/*
+ * initialise an inode cache slab element prior to any use
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep, unsigned long flags)
+{
+	afs_vnode_t *vnode = (afs_vnode_t *) _vnode;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vnode,0,sizeof(*vnode));
+		inode_init_once(&vnode->vfs_inode);
+		init_waitqueue_head(&vnode->update_waitq);
+		spin_lock_init(&vnode->lock);
+		INIT_LIST_HEAD(&vnode->cb_link);
+		INIT_LIST_HEAD(&vnode->cb_hash_link);
+		afs_timer_init(&vnode->cb_timeout,&afs_vnode_cb_timed_out_ops);
+	}
+
+} /* end afs_i_init_once() */
+#endif
+
+/*****************************************************************************/
+/*
+ * allocate an AFS inode struct from our slab cache
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static struct inode *afs_alloc_inode(struct super_block *sb)
+{
+	afs_vnode_t *vnode;
+
+	vnode = (afs_vnode_t *) kmem_cache_alloc(afs_inode_cachep,SLAB_KERNEL);
+	if (!vnode)
+		return NULL;
+
+	memset(&vnode->fid,0,sizeof(vnode->fid));
+	memset(&vnode->status,0,sizeof(vnode->status));
+
+	vnode->volume = NULL;
+	vnode->update_cnt = 0;
+	vnode->flags = 0;
+
+	return &vnode->vfs_inode;
+} /* end afs_alloc_inode() */
+#endif
+
+/*****************************************************************************/
+/*
+ * destroy an AFS inode struct
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+static void afs_destroy_inode(struct inode *inode)
+{
+	_enter("{%lu}",inode->i_ino);
+	kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
+} /* end afs_destroy_inode() */
+#endif
diff --git a/fs/afs/super.h b/fs/afs/super.h
new file mode 100644
index 000000000000..b307b0884181
--- /dev/null
+++ b/fs/afs/super.h
@@ -0,0 +1,43 @@
+/* super.h: AFS filesystem internal private data
+ *
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Howells <dhowells@redhat.com>
+ *
+ */
+
+#ifndef _LINUX_AFS_SUPER_H
+#define _LINUX_AFS_SUPER_H
+
+#include <linux/fs.h>
+#include "server.h"
+
+#ifdef __KERNEL__
+
+/*****************************************************************************/
+/*
+ * AFS superblock private data
+ * - there's one superblock per volume
+ */
+struct afs_super_info
+{
+	afs_volume_t		*volume;	/* volume record */
+	char			rwparent;	/* T if parent is R/W AFS volume */
+};
+
+static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
new file mode 100644
index 000000000000..7013ae6ccc8c
--- /dev/null
+++ b/fs/afs/transport.h
@@ -0,0 +1,21 @@
+/* transport.h: AFS transport management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_TRANSPORT_H
+#define _LINUX_AFS_TRANSPORT_H
+
+#include "types.h"
+#include <rxrpc/transport.h>
+
+/* the cache manager transport endpoint */
+extern struct rxrpc_transport *afs_transport;
+
+#endif /* _LINUX_AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
new file mode 100644
index 000000000000..411925f4fa04
--- /dev/null
+++ b/fs/afs/types.h
@@ -0,0 +1,152 @@
+/* types.h: AFS types
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_TYPES_H
+#define _LINUX_AFS_TYPES_H
+
+#ifdef __KERNEL__
+#include <rxrpc/types.h>
+#endif /* __KERNEL__ */
+
+typedef unsigned			afs_volid_t;
+typedef unsigned			afs_vnodeid_t;
+typedef unsigned long long		afs_dataversion_t;
+
+typedef struct afs_async_op		afs_async_op_t;
+typedef struct afs_callback		afs_callback_t;
+typedef struct afs_cell			afs_cell_t;
+typedef struct afs_fid			afs_fid_t;
+typedef struct afs_file_status		afs_file_status_t;
+typedef struct afs_server		afs_server_t;
+typedef struct afs_timer		afs_timer_t;
+typedef struct afs_vlocation		afs_vlocation_t;
+typedef struct afs_vnode		afs_vnode_t;
+typedef struct afs_volsync		afs_volsync_t;
+typedef struct afs_volume		afs_volume_t;
+typedef struct afs_volume_info		afs_volume_info_t;
+
+typedef struct afsc_cache		afsc_cache_t;
+typedef struct afsc_cache_cell		afsc_cache_cell_t;
+typedef struct afsc_cache_vldb		afsc_cache_vldb_t;
+typedef struct afsc_cell_record		afsc_cell_record_t;
+typedef struct afsc_inode		afsc_inode_t;
+typedef struct afsc_io			afsc_io_t;
+typedef struct afsc_io_subop		afsc_io_subop_t;
+typedef struct afsc_io_queue		afsc_io_queue_t;
+typedef struct afsc_super_block		afsc_super_block_t;
+typedef struct afsc_vldb_record		afsc_vldb_record_t;
+typedef struct afsc_vnode_catalogue	afsc_vnode_catalogue_t;
+typedef struct afsc_vnode_meta		afsc_vnode_meta_t;
+
+typedef struct afsvl_dbentry		afsvl_dbentry_t;
+
+typedef enum {
+	AFSVL_RWVOL,			/* read/write volume */
+	AFSVL_ROVOL,			/* read-only volume */
+	AFSVL_BACKVOL,			/* backup volume */
+} afs_voltype_t;
+
+extern const char *afs_voltypes[];
+
+typedef enum {
+	AFS_FTYPE_INVALID	= 0,
+	AFS_FTYPE_FILE		= 1,
+	AFS_FTYPE_DIR		= 2,
+	AFS_FTYPE_SYMLINK	= 3,
+} afs_file_type_t;
+
+#ifdef __KERNEL__
+
+/*****************************************************************************/
+/*
+ * AFS file identifier
+ */
+struct afs_fid
+{
+	afs_volid_t	vid;		/* volume ID */
+	afs_vnodeid_t	vnode;		/* file index within volume */
+	unsigned	unique;		/* unique ID number (file index version) */
+};
+
+/*****************************************************************************/
+/*
+ * AFS callback notification
+ */
+typedef enum {
+	AFSCM_CB_UNTYPED	= 0,	/* no type set on CB break */
+	AFSCM_CB_EXCLUSIVE	= 1,	/* CB exclusive to CM [not implemented] */
+	AFSCM_CB_SHARED		= 2,	/* CB shared by other CM's */
+	AFSCM_CB_DROPPED	= 3,	/* CB promise cancelled by file server */
+} afs_callback_type_t;
+
+struct afs_callback
+{
+	afs_server_t		*server;	/* server that made the promise */
+	afs_fid_t		fid;		/* file identifier */
+	unsigned		version;	/* callback version */
+	unsigned		expiry;		/* time at which expires */
+	afs_callback_type_t	type;		/* type of callback */
+};
+
+#define AFSCBMAX 50
+
+/*****************************************************************************/
+/*
+ * AFS volume information
+ */
+struct afs_volume_info
+{
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t		type;		/* type of this volume */
+	afs_volid_t		type_vids[5];	/* volume ID's for possible types for this vol */
+	
+	/* list of fileservers serving this volume */
+	size_t			nservers;	/* number of entries used in servers[] */
+	struct {
+		struct in_addr	addr;		/* fileserver address */
+	} servers[8];
+};
+
+/*****************************************************************************/
+/*
+ * AFS file status information
+ */
+struct afs_file_status
+{
+	unsigned		if_version;	/* interface version */
+#define AFS_FSTATUS_VERSION	1
+
+	afs_file_type_t		type;		/* file type */
+	unsigned		nlink;		/* link count */
+	size_t			size;		/* file size */
+	afs_dataversion_t	version;	/* current data version */
+	unsigned		author;		/* author ID */
+	unsigned		owner;		/* owner ID */
+	unsigned		caller_access;	/* access rights for authenticated caller */
+	unsigned		anon_access;	/* access rights for unauthenticated caller */
+	umode_t			mode;		/* UNIX mode */
+	afs_fid_t		parent;		/* parent file ID */
+	time_t			mtime_client;	/* last time client changed data */
+	time_t			mtime_server;	/* last time server changed data */
+};
+
+/*****************************************************************************/
+/*
+ * AFS volume synchronisation information
+ */
+struct afs_volsync
+{
+	time_t			creation;	/* volume creation time */
+};
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_TYPES_H */
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
new file mode 100644
index 000000000000..564e9939af40
--- /dev/null
+++ b/fs/afs/vlclient.c
@@ -0,0 +1,662 @@
+/* vlclient.c: AFS Volume Location Service client
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include "server.h"
+#include "volume.h"
+#include "vlclient.h"
+#include "kafsasyncd.h"
+#include "kafstimod.h"
+#include "errors.h"
+#include "internal.h"
+
+#define VLGETENTRYBYID		503	/* AFS Get Cache Entry By ID operation ID */
+#define VLGETENTRYBYNAME	504	/* AFS Get Cache Entry By Name operation ID */
+#define VLPROBE			514	/* AFS Probe Volume Location Service operation ID */
+
+static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
+static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
+
+/*****************************************************************************/
+/*
+ * map afs VL abort codes to/from Linux error codes
+ * - called with call->lock held
+ */
+static void afs_rxvl_aemap(struct rxrpc_call *call)
+{
+	int err;
+
+	_enter("{%u,%u,%d}",call->app_err_state,call->app_abort_code,call->app_errno);
+
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+		return;
+
+	case RXRPC_ESTATE_PEER_ABORT:
+		switch (call->app_abort_code) {
+		case AFSVL_IDEXIST:		err = -EEXIST;		break;
+		case AFSVL_IO:			err = -EREMOTEIO;	break;
+		case AFSVL_NAMEEXIST:		err = -EEXIST;		break;
+		case AFSVL_CREATEFAIL:		err = -EREMOTEIO;	break;
+		case AFSVL_NOENT:		err = -ENOMEDIUM;	break;
+		case AFSVL_EMPTY:		err = -ENOMEDIUM;	break;
+		case AFSVL_ENTDELETED:		err = -ENOMEDIUM;	break;
+		case AFSVL_BADNAME:		err = -EINVAL;		break;
+		case AFSVL_BADINDEX:		err = -EINVAL;		break;
+		case AFSVL_BADVOLTYPE:		err = -EINVAL;		break;
+		case AFSVL_BADSERVER:		err = -EINVAL;		break;
+		case AFSVL_BADPARTITION:	err = -EINVAL;		break;
+		case AFSVL_REPSFULL:		err = -EFBIG;		break;
+		case AFSVL_NOREPSERVER:		err = -ENOENT;		break;
+		case AFSVL_DUPREPSERVER:	err = -EEXIST;		break;
+		case AFSVL_RWNOTFOUND:		err = -ENOENT;		break;
+		case AFSVL_BADREFCOUNT:		err = -EINVAL;		break;
+		case AFSVL_SIZEEXCEEDED:	err = -EINVAL;		break;
+		case AFSVL_BADENTRY:		err = -EINVAL;		break;
+		case AFSVL_BADVOLIDBUMP:	err = -EINVAL;		break;
+		case AFSVL_IDALREADYHASHED:	err = -EINVAL;		break;
+		case AFSVL_ENTRYLOCKED:		err = -EBUSY;		break;
+		case AFSVL_BADVOLOPER:		err = -EBADRQC;		break;
+		case AFSVL_BADRELLOCKTYPE:	err = -EINVAL;		break;
+		case AFSVL_RERELEASE:		err = -EREMOTEIO;	break;
+		case AFSVL_BADSERVERFLAG:	err = -EINVAL;		break;
+		case AFSVL_PERM:		err = -EACCES;		break;
+		case AFSVL_NOMEM:		err = -EREMOTEIO;	break;
+		default:
+			err = afs_abort_to_error(call->app_abort_code);
+			break;
+		}
+		call->app_errno = err;
+		return;
+
+	default:
+		return;
+	}
+} /* end afs_rxvl_aemap() */
+
+/*****************************************************************************/
+/*
+ * probe a volume location server to see if it is still alive
+ */
+int afs_rxvl_probe(afs_server_t *server, int alloc_flags)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 param[1];
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLPROBE;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLPROBE);
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,alloc_flags,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->app_call_state!=RXRPC_CSTATE_CLNT_RCV_REPLY ||
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto abort;
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+		goto out_unwait;
+
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		ret = 0;
+		goto out_unwait;
+
+	default:
+		BUG();
+	}
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	return ret;
+
+} /* end afs_rxvl_probe() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by name
+ */
+int afs_rxvl_get_entry_by_name(afs_server_t *server, const char *volname,
+			       afsc_vldb_record_t *entry)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[3];
+	unsigned tmp;
+	size_t sent;
+	int ret, loop;
+	u32 *bp, param[2], zero;
+
+	_enter(",%s,",volname);
+
+	memset(entry,0,sizeof(*entry));
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLGETENTRYBYNAME;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	piov[1].iov_len = strlen(volname);
+	piov[1].iov_base = (char*)volname;
+
+	zero = 0;
+	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+	piov[2].iov_base = &zero;
+
+	param[0] = htonl(VLGETENTRYBYNAME);
+	param[1] = htonl(piov[1].iov_len);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,3,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,384);
+
+	ret = rxrpc_call_read_data(call,bp,384,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	for (loop=0; loop<64; loop++)
+		entry->name[loop] = ntohl(*bp++);
+	bp++; /* final NUL */
+
+	bp++; /* type */
+	entry->nservers = ntohl(*bp++);
+
+	for (loop=0; loop<8; loop++)
+		entry->servers[loop].s_addr = *bp++;
+
+	bp += 8; /* partition IDs */
+
+	for (loop=0; loop<8; loop++) {
+		tmp = ntohl(*bp++);
+		if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+	}
+
+	entry->vid[0] = ntohl(*bp++);
+	entry->vid[1] = ntohl(*bp++);
+	entry->vid[2] = ntohl(*bp++);
+
+	bp++; /* clone ID */
+
+	tmp = ntohl(*bp++); /* flags */
+	if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+	if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+	if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+	ret = -ENOMEDIUM;
+	if (!entry->vidmask)
+		goto abort;
+
+	/* success */
+	entry->ctime = xtime.tv_sec;
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxvl_get_entry_by_name() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by ID
+ */
+int afs_rxvl_get_entry_by_id(afs_server_t *server,
+			     afs_volid_t volid,
+			     afs_voltype_t voltype,
+			     afsc_vldb_record_t *entry)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	unsigned tmp;
+	size_t sent;
+	int ret, loop;
+	u32 *bp, param[3];
+
+	_enter(",%x,%d,",volid,voltype);
+
+	memset(entry,0,sizeof(*entry));
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(server,&conn);
+	if (ret<0)
+		goto out;
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,NULL,NULL,afs_rxvl_aemap,&call);
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		goto out_put_conn;
+	}
+	call->app_opcode = VLGETENTRYBYID;
+
+	/* we want to get event notifications from the call */
+	add_wait_queue(&call->waitq,&myself);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLGETENTRYBYID);
+	param[1] = htonl(volid);
+	param[2] = htonl(voltype);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0)
+		goto abort;
+
+	/* wait for the reply to completely arrive */
+	bp = rxrpc_call_alloc_scratch(call,384);
+
+	ret = rxrpc_call_read_data(call,bp,384,RXRPC_CALL_READ_BLOCK|RXRPC_CALL_READ_ALL);
+	if (ret<0) {
+		if (ret==-ECONNABORTED) {
+			ret = call->app_errno;
+			goto out_unwait;
+		}
+		goto abort;
+	}
+
+	/* unmarshall the reply */
+	for (loop=0; loop<64; loop++)
+		entry->name[loop] = ntohl(*bp++);
+	bp++; /* final NUL */
+
+	bp++; /* type */
+	entry->nservers = ntohl(*bp++);
+
+	for (loop=0; loop<8; loop++)
+		entry->servers[loop].s_addr = *bp++;
+
+	bp += 8; /* partition IDs */
+
+	for (loop=0; loop<8; loop++) {
+		tmp = ntohl(*bp++);
+		if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+	}
+
+	entry->vid[0] = ntohl(*bp++);
+	entry->vid[1] = ntohl(*bp++);
+	entry->vid[2] = ntohl(*bp++);
+
+	bp++; /* clone ID */
+
+	tmp = ntohl(*bp++); /* flags */
+	if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+	if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+	if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+	ret = -ENOMEDIUM;
+	if (!entry->vidmask)
+		goto abort;
+
+#if 0 /* TODO: remove */
+	entry->nservers = 3;
+	entry->servers[0].s_addr = htonl(0xac101249);
+	entry->servers[1].s_addr = htonl(0xac101243);
+	entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
+
+	entry->srvtmask[0] = AFSC_VOL_STM_RO;
+	entry->srvtmask[1] = AFSC_VOL_STM_RO;
+	entry->srvtmask[2] = AFSC_VOL_STM_RO | AFSC_VOL_STM_RW;
+#endif
+
+	/* success */
+	entry->ctime = xtime.tv_sec;
+	ret = 0;
+
+ out_unwait:
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&call->waitq,&myself);
+	rxrpc_put_call(call);
+ out_put_conn:
+	rxrpc_put_connection(conn);
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+ abort:
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	rxrpc_call_abort(call,ret);
+	schedule();
+	goto out_unwait;
+} /* end afs_rxvl_get_entry_by_id() */
+
+/*****************************************************************************/
+/*
+ * look up a volume location database entry by ID asynchronously
+ */
+int afs_rxvl_get_entry_by_id_async(afs_async_op_t *op,
+				   afs_volid_t volid,
+				   afs_voltype_t voltype)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_call *call;
+	struct iovec piov[1];
+	size_t sent;
+	int ret;
+	u32 param[3];
+
+	_enter(",%x,%d,",volid,voltype);
+
+	/* get hold of the vlserver connection */
+	ret = afs_server_get_vlconn(op->server,&conn);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* create a call through that connection */
+	ret = rxrpc_create_call(conn,
+				afs_rxvl_get_entry_by_id_attn,
+				afs_rxvl_get_entry_by_id_error,
+				afs_rxvl_aemap,
+				&op->call);
+	rxrpc_put_connection(conn);
+
+	if (ret<0) {
+		printk("kAFS: Unable to create call: %d\n",ret);
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	op->call->app_opcode = VLGETENTRYBYID;
+	op->call->app_user = op;
+
+	call = op->call;
+	rxrpc_get_call(call);
+
+	/* send event notifications from the call to kafsasyncd */
+	afs_kafsasyncd_begin_op(op);
+
+	/* marshall the parameters */
+	param[0] = htonl(VLGETENTRYBYID);
+	param[1] = htonl(volid);
+	param[2] = htonl(voltype);
+
+	piov[0].iov_len = sizeof(param);
+	piov[0].iov_base = param;
+
+	/* allocate result read buffer in scratch space */
+	call->app_scr_ptr = rxrpc_call_alloc_scratch(op->call,384);
+
+	/* send the parameters to the server */
+	ret = rxrpc_call_write_data(call,1,piov,RXRPC_LAST_PACKET,GFP_NOFS,0,&sent);
+	if (ret<0) {
+		rxrpc_call_abort(call,ret); /* handle from kafsasyncd */
+		ret = 0;
+		goto out;
+	}
+
+	/* wait for the reply to completely arrive */
+	ret = rxrpc_call_read_data(call,call->app_scr_ptr,384,0);
+	switch (ret) {
+	case 0:
+	case -EAGAIN:
+	case -ECONNABORTED:
+		ret = 0;
+		break;	/* all handled by kafsasyncd */
+
+	default:
+		rxrpc_call_abort(call,ret); /* force kafsasyncd to handle it */
+		ret = 0;
+		break;
+	}
+
+ out:
+	rxrpc_put_call(call);
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_rxvl_get_entry_by_id_async() */
+
+/*****************************************************************************/
+/*
+ * attend to the asynchronous get VLDB entry by ID
+ */
+int afs_rxvl_get_entry_by_id_async2(afs_async_op_t *op,
+				    afsc_vldb_record_t *entry)
+{
+	unsigned *bp, tmp;
+	int loop, ret;
+
+	_enter("{op=%p cst=%u}",op,op->call->app_call_state);
+
+	memset(entry,0,sizeof(*entry));
+
+	if (op->call->app_call_state==RXRPC_CSTATE_COMPLETE) {
+		/* operation finished */
+		afs_kafsasyncd_terminate_op(op);
+
+		bp = op->call->app_scr_ptr;
+
+		/* unmarshall the reply */
+		for (loop=0; loop<64; loop++)
+			entry->name[loop] = ntohl(*bp++);
+		bp++; /* final NUL */
+
+		bp++; /* type */
+		entry->nservers = ntohl(*bp++);
+
+		for (loop=0; loop<8; loop++)
+			entry->servers[loop].s_addr = *bp++;
+
+		bp += 8; /* partition IDs */
+
+		for (loop=0; loop<8; loop++) {
+			tmp = ntohl(*bp++);
+			if (tmp & AFS_VLSF_RWVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RW;
+			if (tmp & AFS_VLSF_ROVOL  ) entry->srvtmask[loop] |= AFSC_VOL_STM_RO;
+			if (tmp & AFS_VLSF_BACKVOL) entry->srvtmask[loop] |= AFSC_VOL_STM_BAK;
+		}
+
+		entry->vid[0] = ntohl(*bp++);
+		entry->vid[1] = ntohl(*bp++);
+		entry->vid[2] = ntohl(*bp++);
+
+		bp++; /* clone ID */
+
+		tmp = ntohl(*bp++); /* flags */
+		if (tmp & AFS_VLF_RWEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RW;
+		if (tmp & AFS_VLF_ROEXISTS  ) entry->vidmask |= AFSC_VOL_STM_RO;
+		if (tmp & AFS_VLF_BACKEXISTS) entry->vidmask |= AFSC_VOL_STM_BAK;
+
+		ret = -ENOMEDIUM;
+		if (!entry->vidmask) {
+			rxrpc_call_abort(op->call,ret);
+			goto done;
+		}
+
+#if 0 /* TODO: remove */
+		entry->nservers = 3;
+		entry->servers[0].s_addr = htonl(0xac101249);
+		entry->servers[1].s_addr = htonl(0xac101243);
+		entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
+
+		entry->srvtmask[0] = AFSC_VOL_STM_RO;
+		entry->srvtmask[1] = AFSC_VOL_STM_RO;
+		entry->srvtmask[2] = AFSC_VOL_STM_RO | AFSC_VOL_STM_RW;
+#endif
+
+		/* success */
+		entry->ctime = xtime.tv_sec;
+		ret = 0;
+		goto done;
+	}
+
+	if (op->call->app_call_state==RXRPC_CSTATE_ERROR) {
+		/* operation error */
+		ret = op->call->app_errno;
+		goto done;
+	}
+
+	_leave(" = -EAGAIN");
+	return -EAGAIN;
+
+ done:
+	rxrpc_put_call(op->call);
+	op->call = NULL;
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_rxvl_get_entry_by_id_async2() */
+
+/*****************************************************************************/
+/*
+ * handle attention events on an async get-entry-by-ID op
+ * - called from krxiod
+ */
+static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
+{
+	afs_async_op_t *op = call->app_user;
+
+	_enter("{op=%p cst=%u}",op,call->app_call_state);
+
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_COMPLETE:
+		afs_kafsasyncd_attend_op(op);
+		break;
+	case RXRPC_CSTATE_CLNT_RCV_REPLY:
+		if (call->app_async_read)
+			break;
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		if (call->app_read_count==0)
+			break;
+		printk("kAFS: Reply bigger than expected {cst=%u asyn=%d mark=%d rdy=%u pr=%u%s}",
+		       call->app_call_state,
+		       call->app_async_read,
+		       call->app_mark,
+		       call->app_ready_qty,
+		       call->pkt_rcv_count,
+		       call->app_last_rcv ? " last" : "");
+
+		rxrpc_call_abort(call,-EBADMSG);
+		break;
+	default:
+		BUG();
+	}
+
+	_leave("");
+
+} /* end afs_rxvl_get_entry_by_id_attn() */
+
+/*****************************************************************************/
+/*
+ * handle error events on an async get-entry-by-ID op
+ * - called from krxiod
+ */
+static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call)
+{
+	afs_async_op_t *op = call->app_user;
+
+	_enter("{op=%p cst=%u}",op,call->app_call_state);
+
+	afs_kafsasyncd_attend_op(op);
+
+	_leave("");
+
+} /* end afs_rxvl_get_entry_by_id_error() */
diff --git a/fs/afs/vlclient.h b/fs/afs/vlclient.h
new file mode 100644
index 000000000000..5791e04d6382
--- /dev/null
+++ b/fs/afs/vlclient.h
@@ -0,0 +1,95 @@
+/* vlclient.h: Volume Location Service client interface
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VLCLIENT_H
+#define _LINUX_AFS_VLCLIENT_H
+
+#include "types.h"
+
+enum AFSVL_Errors {
+	AFSVL_IDEXIST 		= 363520,	/* Volume Id entry exists in vl database */
+	AFSVL_IO 		= 363521,	/* I/O related error */
+	AFSVL_NAMEEXIST 	= 363522,	/* Volume name entry exists in vl database */
+	AFSVL_CREATEFAIL 	= 363523,	/* Internal creation failure */
+	AFSVL_NOENT 		= 363524,	/* No such entry */
+	AFSVL_EMPTY 		= 363525,	/* Vl database is empty */
+	AFSVL_ENTDELETED 	= 363526,	/* Entry is deleted (soft delete) */
+	AFSVL_BADNAME 		= 363527,	/* Volume name is illegal */
+	AFSVL_BADINDEX 		= 363528,	/* Index is out of range */
+	AFSVL_BADVOLTYPE 	= 363529,	/* Bad volume type */
+	AFSVL_BADSERVER 	= 363530,	/* Illegal server number (out of range) */
+	AFSVL_BADPARTITION 	= 363531,	/* Bad partition number */
+	AFSVL_REPSFULL 		= 363532,	/* Run out of space for Replication sites */
+	AFSVL_NOREPSERVER 	= 363533,	/* No such Replication server site exists */
+	AFSVL_DUPREPSERVER 	= 363534,	/* Replication site already exists */
+	AFSVL_RWNOTFOUND 	= 363535,	/* Parent R/W entry not found */
+	AFSVL_BADREFCOUNT 	= 363536,	/* Illegal Reference Count number */
+	AFSVL_SIZEEXCEEDED 	= 363537,	/* Vl size for attributes exceeded */
+	AFSVL_BADENTRY 		= 363538,	/* Bad incoming vl entry */
+	AFSVL_BADVOLIDBUMP 	= 363539,	/* Illegal max volid increment */
+	AFSVL_IDALREADYHASHED 	= 363540,	/* RO/BACK id already hashed */
+	AFSVL_ENTRYLOCKED 	= 363541,	/* Vl entry is already locked */
+	AFSVL_BADVOLOPER 	= 363542,	/* Bad volume operation code */
+	AFSVL_BADRELLOCKTYPE 	= 363543,	/* Bad release lock type */
+	AFSVL_RERELEASE 	= 363544,	/* Status report: last release was aborted */
+	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server �ag */
+	AFSVL_PERM 		= 363546,	/* No permission access */
+	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
+};
+
+/* maps to "struct vldbentry" in vvl-spec.pdf */
+struct  afsvl_dbentry {
+	char		name[65];		/* name of volume (including NUL char) */
+	afs_voltype_t	type;			/* volume type */
+	unsigned	num_servers;		/* num servers that hold instances of this vol */
+	unsigned	clone_id;		/* cloning ID */
+
+	unsigned	flags;
+#define AFS_VLF_RWEXISTS	0x1000		/* R/W volume exists */
+#define AFS_VLF_ROEXISTS	0x2000		/* R/O volume exists */
+#define AFS_VLF_BACKEXISTS	0x4000		/* backup volume exists */
+
+	afs_volid_t	volume_ids[3];		/* volume IDs */
+
+	struct {
+		struct in_addr	addr;		/* server address */
+		unsigned	partition;	/* partition ID on this server */
+		unsigned	flags;		/* server specific flags */
+#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
+#define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
+#define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
+#define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
+	} servers[8];
+
+};
+
+/* probe a volume location server to see if it is still alive */
+extern int afs_rxvl_probe(afs_server_t *server, int alloc_flags);
+
+/* look up a volume location database entry by name */
+extern int afs_rxvl_get_entry_by_name(afs_server_t *server,
+				      const char *volname,
+				      afsc_vldb_record_t *entry);
+
+/* look up a volume location database entry by ID */
+extern int afs_rxvl_get_entry_by_id(afs_server_t *server,
+				    afs_volid_t	volid,
+				    afs_voltype_t voltype,
+				    afsc_vldb_record_t *entry);
+
+extern int afs_rxvl_get_entry_by_id_async(afs_async_op_t *op,
+					  afs_volid_t volid,
+					  afs_voltype_t voltype);
+
+extern int afs_rxvl_get_entry_by_id_async2(afs_async_op_t *op,
+					   afsc_vldb_record_t *entry);
+
+#endif /* _LINUX_AFS_VLCLIENT_H */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
new file mode 100644
index 000000000000..8d9f4d7e8f29
--- /dev/null
+++ b/fs/afs/vlocation.c
@@ -0,0 +1,824 @@
+/* vlocation.c: volume location management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "kafstimod.h"
+#include <rxrpc/connection.h>
+#include "internal.h"
+
+#define AFS_VLDB_TIMEOUT HZ*1000
+
+static void afs_vlocation_update_timer(afs_timer_t *timer);
+static void afs_vlocation_update_attend(afs_async_op_t *op);
+static void afs_vlocation_update_discard(afs_async_op_t *op);
+
+static void __afs_vlocation_timeout(afs_timer_t *timer)
+{
+	afs_vlocation_t *vlocation = list_entry(timer,afs_vlocation_t,timeout);
+
+	_debug("VL TIMEOUT [%s{u=%d}]",vlocation->vldb.name,atomic_read(&vlocation->usage));
+
+	afs_vlocation_do_timeout(vlocation);
+}
+
+static const struct afs_timer_ops afs_vlocation_timer_ops = {
+	.timed_out	= __afs_vlocation_timeout,
+};
+
+static const struct afs_timer_ops afs_vlocation_update_timer_ops = {
+	.timed_out	= afs_vlocation_update_timer,
+};
+
+static const struct afs_async_op_ops afs_vlocation_update_op_ops = {
+	.attend		= afs_vlocation_update_attend,
+	.discard	= afs_vlocation_update_discard,
+};
+
+static LIST_HEAD(afs_vlocation_update_pendq);	/* queue of VLs awaiting update */
+static afs_vlocation_t *afs_vlocation_update;	/* VL currently being updated */
+static spinlock_t afs_vlocation_update_lock = SPIN_LOCK_UNLOCKED; /* lock guarding update queue */
+
+/*****************************************************************************/
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ * question
+ * - caller must have cell->vl_sem write-locked
+ */
+static int afs_vlocation_access_vl_by_name(afs_vlocation_t *vlocation,
+					   const char *name,
+					   afsc_vldb_record_t *vldb)
+{
+	afs_server_t *server = NULL;
+	afs_cell_t *cell = vlocation->cell;
+	int count, ret;
+
+	_enter("%s,%s,",cell->name,name);
+
+	ret = -ENOMEDIUM;
+	for (count=cell->vl_naddrs; count>0; count--) {
+		_debug("CellServ[%hu]: %08x",
+		       cell->vl_curr_svix,cell->vl_addrs[cell->vl_curr_svix].s_addr);
+
+		/* try and create a server */
+		ret = afs_server_lookup(cell,&cell->vl_addrs[cell->vl_curr_svix],&server);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOMEM:
+		case -ENONET:
+			goto out;
+		default:
+			goto rotate;
+		}
+
+		/* attempt to access the VL server */
+		ret = afs_rxvl_get_entry_by_name(server,name,vldb);
+		switch (ret) {
+		case 0:
+			afs_put_server(server);
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			down_write(&server->sem);
+			if (server->vlserver) {
+				rxrpc_put_connection(server->vlserver);
+				server->vlserver = NULL;
+			}
+			up_write(&server->sem);
+			afs_put_server(server);
+			if (ret==-ENOMEM || ret==-ENONET)
+				goto out;
+			goto rotate;
+		case -ENOMEDIUM:
+			afs_put_server(server);
+			goto out;
+		default:
+			afs_put_server(server);
+			ret = -ENOMEDIUM;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vlocation_access_vl_by_name() */
+
+/*****************************************************************************/
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ * question
+ * - caller must have cell->vl_sem write-locked
+ */
+static int afs_vlocation_access_vl_by_id(afs_vlocation_t *vlocation,
+					 afs_volid_t volid,
+					 afs_voltype_t voltype,
+					 afsc_vldb_record_t *vldb)
+{
+	afs_server_t *server = NULL;
+	afs_cell_t *cell = vlocation->cell;
+	int count, ret;
+
+	_enter("%s,%x,%d,",cell->name,volid,voltype);
+
+	ret = -ENOMEDIUM;
+	for (count=cell->vl_naddrs; count>0; count--) {
+		_debug("CellServ[%hu]: %08x",
+		       cell->vl_curr_svix,cell->vl_addrs[cell->vl_curr_svix].s_addr);
+
+		/* try and create a server */
+		ret = afs_server_lookup(cell,&cell->vl_addrs[cell->vl_curr_svix],&server);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOMEM:
+		case -ENONET:
+			goto out;
+		default:
+			goto rotate;
+		}
+
+		/* attempt to access the VL server */
+		ret = afs_rxvl_get_entry_by_id(server,volid,voltype,vldb);
+		switch (ret) {
+		case 0:
+			afs_put_server(server);
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			down_write(&server->sem);
+			if (server->vlserver) {
+				rxrpc_put_connection(server->vlserver);
+				server->vlserver = NULL;
+			}
+			up_write(&server->sem);
+			afs_put_server(server);
+			if (ret==-ENOMEM || ret==-ENONET)
+				goto out;
+			goto rotate;
+		case -ENOMEDIUM:
+			afs_put_server(server);
+			goto out;
+		default:
+			afs_put_server(server);
+			ret = -ENOMEDIUM;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+	}
+
+ out:
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vlocation_access_vl_by_id() */
+
+/*****************************************************************************/
+/*
+ * lookup volume location
+ * - caller must have cell->vol_sem write-locked
+ * - iterate through the VL servers in a cell until one of them admits knowing about the volume in
+ *   question
+ * - lookup in the local cache if not able to find on the VL server
+ * - insert/update in the local cache if did get a VL response
+ */
+int afs_vlocation_lookup(afs_cell_t *cell, const char *name, afs_vlocation_t **_vlocation)
+{
+	afsc_vldb_record_t vldb;
+	struct list_head *_p;
+	afs_vlocation_t *vlocation;
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int active = 0, ret;
+
+	_enter(",%s,%s,",cell->name,name);
+
+	if (strlen(name)>sizeof(vlocation->vldb.name)) {
+		_leave(" = -ENAMETOOLONG");
+		return -ENAMETOOLONG;
+	}
+
+	/* search the cell's active list first */
+	list_for_each(_p,&cell->vl_list) {
+		vlocation = list_entry(_p,afs_vlocation_t,link);
+		if (strncmp(vlocation->vldb.name,name,sizeof(vlocation->vldb.name))==0)
+			goto found_in_memory;
+	}
+
+	/* search the cell's graveyard list second */
+	spin_lock(&cell->vl_gylock);
+	list_for_each(_p,&cell->vl_graveyard) {
+		vlocation = list_entry(_p,afs_vlocation_t,link);
+		if (strncmp(vlocation->vldb.name,name,sizeof(vlocation->vldb.name))==0)
+			goto found_in_graveyard;
+	}
+	spin_unlock(&cell->vl_gylock);
+
+	/* not in the cell's in-memory lists - create a new record */
+	vlocation = kmalloc(sizeof(afs_vlocation_t),GFP_KERNEL);
+	if (!vlocation)
+		return -ENOMEM;
+
+	memset(vlocation,0,sizeof(afs_vlocation_t));
+	atomic_set(&vlocation->usage,1);
+	INIT_LIST_HEAD(&vlocation->link);
+	rwlock_init(&vlocation->lock);
+	strncpy(vlocation->vldb.name,name,sizeof(vlocation->vldb.name));
+
+	afs_timer_init(&vlocation->timeout,&afs_vlocation_timer_ops);
+	afs_timer_init(&vlocation->upd_timer,&afs_vlocation_update_timer_ops);
+	afs_async_op_init(&vlocation->upd_op,&afs_vlocation_update_op_ops);
+
+	INIT_LIST_HEAD(&vlocation->caches);
+
+	afs_get_cell(cell);
+	vlocation->cell = cell;
+
+	list_add_tail(&vlocation->link,&cell->vl_list);
+
+#if 0
+	/* search local cache if wasn't in memory */
+	ret = afsc_lookup_vlocation(vlocation);
+	switch (ret) {
+	default:	goto error;		/* disk error */
+	case 0:		goto found_in_cache;	/* pulled from local cache into memory */
+	case -ENOENT:	break;			/* not in local cache */
+	}
+#endif
+
+	/* try to look up an unknown volume in the cell VL databases by name */
+	ret = afs_vlocation_access_vl_by_name(vlocation,name,&vldb);
+	if (ret<0) {
+		printk("kAFS: failed to locate '%s' in cell '%s'\n",name,cell->name);
+		goto error;
+	}
+
+	goto found_on_vlserver;
+
+ found_in_graveyard:
+	/* found in the graveyard - resurrect */
+	_debug("found in graveyard");
+	atomic_inc(&vlocation->usage);
+	list_del(&vlocation->link);
+	list_add_tail(&vlocation->link,&cell->vl_list);
+	spin_unlock(&cell->vl_gylock);
+
+	afs_kafstimod_del_timer(&vlocation->timeout);
+	goto active;
+
+ found_in_memory:
+	/* found in memory - check to see if it's active */
+	_debug("found in memory");
+	atomic_inc(&vlocation->usage);
+
+ active:
+	active = 1;
+
+/* found_in_cache: */
+	/* try to look up a cached volume in the cell VL databases by ID */
+	_debug("found in cache");
+
+	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       vlocation->vldb.name,
+	       vlocation->vldb.vidmask,
+	       ntohl(vlocation->vldb.servers[0].s_addr),vlocation->vldb.srvtmask[0],
+	       ntohl(vlocation->vldb.servers[1].s_addr),vlocation->vldb.srvtmask[1],
+	       ntohl(vlocation->vldb.servers[2].s_addr),vlocation->vldb.srvtmask[2]
+	       );
+
+	_debug("Vids: %08x %08x %08x",
+	       vlocation->vldb.vid[0],vlocation->vldb.vid[1],vlocation->vldb.vid[2]);
+
+	if (vlocation->vldb.vidmask & AFSC_VOL_STM_RW) {
+		vid = vlocation->vldb.vid[0];
+		voltype = AFSVL_RWVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_RO) {
+		vid = vlocation->vldb.vid[1];
+		voltype = AFSVL_ROVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_BAK) {
+		vid = vlocation->vldb.vid[2];
+		voltype = AFSVL_BACKVOL;
+	}
+	else {
+		BUG();
+		vid = 0;
+		voltype = 0;
+	}
+
+	ret = afs_vlocation_access_vl_by_id(vlocation,vid,voltype,&vldb);
+	switch (ret) {
+		/* net error */
+	default:
+		printk("kAFS: failed to volume '%s' (%x) up in '%s': %d\n",
+		       name,vid,cell->name,ret);
+		goto error;
+
+		/* pulled from local cache into memory */
+	case 0:	
+		goto found_on_vlserver;
+
+		/* uh oh... looks like the volume got deleted */
+	case -ENOMEDIUM:
+		printk("kAFS: volume '%s' (%x) does not exist '%s'\n",name,vid,cell->name);
+
+		/* TODO: make existing record unavailable */
+		goto error;
+	}
+
+ found_on_vlserver:
+	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       name,
+	       vldb.vidmask,
+	       ntohl(vldb.servers[0].s_addr),vldb.srvtmask[0],
+	       ntohl(vldb.servers[1].s_addr),vldb.srvtmask[1],
+	       ntohl(vldb.servers[2].s_addr),vldb.srvtmask[2]
+	       );
+
+	_debug("Vids: %08x %08x %08x",vldb.vid[0],vldb.vid[1],vldb.vid[2]);
+
+	if (strncmp(vldb.name,name,sizeof(vlocation->vldb.name))!=0)
+		printk("kAFS: name of volume '%s' changed to '%s' on server\n",name,vldb.name);
+
+	memcpy(&vlocation->vldb,&vldb,sizeof(vlocation->vldb));
+
+#if 0
+	/* add volume entry to local cache */
+	ret = afsc_update_vlocation(vlocation);
+	if (ret<0)
+		goto error;
+#endif
+
+	afs_kafstimod_add_timer(&vlocation->upd_timer,10*HZ);
+
+	*_vlocation = vlocation;
+	_leave(" = 0 (%p)",vlocation);
+	return 0;
+
+ error:
+	if (vlocation) {
+		if (active) {
+			__afs_put_vlocation(vlocation);
+		}
+		else {
+			list_del(&vlocation->link);
+			afs_put_cell(vlocation->cell);
+#if 0
+			afs_put_cache(vlocation->cache);
+#endif
+			kfree(vlocation);
+		}
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vlocation_lookup() */
+
+/*****************************************************************************/
+/*
+ * finish using a volume location record
+ * - caller must have cell->vol_sem write-locked
+ */
+void __afs_put_vlocation(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell = vlocation->cell;
+
+	_enter("%s",vlocation->vldb.name);
+
+	/* sanity check */
+	if (atomic_read(&vlocation->usage)<=0)
+		BUG();
+
+	spin_lock(&cell->vl_gylock);
+	if (likely(!atomic_dec_and_test(&vlocation->usage))) {
+		spin_unlock(&cell->vl_gylock);
+		_leave("");
+		return;
+	}
+
+	/* move to graveyard queue */
+	list_del(&vlocation->link);
+	list_add_tail(&vlocation->link,&cell->vl_graveyard);
+
+	/* remove from pending timeout queue (refcounted if actually being updated) */
+	list_del_init(&vlocation->upd_op.link);
+
+	/* time out in 10 secs */
+	afs_kafstimod_del_timer(&vlocation->upd_timer);
+	afs_kafstimod_add_timer(&vlocation->timeout,10*HZ);
+
+	spin_unlock(&cell->vl_gylock);
+
+	_leave(" [killed]");
+} /* end __afs_put_vlocation() */
+
+/*****************************************************************************/
+/*
+ * finish using a volume location record
+ */
+void afs_put_vlocation(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell = vlocation->cell;
+
+	down_write(&cell->vl_sem);
+	__afs_put_vlocation(vlocation);
+	up_write(&cell->vl_sem);
+} /* end afs_put_vlocation() */
+
+/*****************************************************************************/
+/*
+ * timeout vlocation record
+ * - removes from the cell's graveyard if the usage count is zero
+ */
+void afs_vlocation_do_timeout(afs_vlocation_t *vlocation)
+{
+	afs_cell_t *cell;
+
+	_enter("%s",vlocation->vldb.name);
+
+	cell = vlocation->cell;
+
+	if (atomic_read(&vlocation->usage)<0) BUG();
+
+	/* remove from graveyard if still dead */
+	spin_lock(&cell->vl_gylock);
+	if (atomic_read(&vlocation->usage)==0)
+		list_del_init(&vlocation->link);
+	else
+		vlocation = NULL;
+	spin_unlock(&cell->vl_gylock);
+
+	if (!vlocation) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	/* we can now destroy it properly */
+	afs_put_cell(cell);
+#if 0
+	afs_put_cache(vlocation->cache);
+#endif
+
+	kfree(vlocation);
+
+	_leave(" [destroyed]");
+} /* end afs_vlocation_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * send an update operation to the currently selected server
+ */
+static int afs_vlocation_update_begin(afs_vlocation_t *vlocation)
+{
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int ret;
+
+	_enter("%s{ufs=%u ucs=%u}",
+	       vlocation->vldb.name,vlocation->upd_first_svix,vlocation->upd_curr_svix);
+
+	/* try to look up a cached volume in the cell VL databases by ID */
+	if (vlocation->vldb.vidmask & AFSC_VOL_STM_RW) {
+		vid = vlocation->vldb.vid[0];
+		voltype = AFSVL_RWVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_RO) {
+		vid = vlocation->vldb.vid[1];
+		voltype = AFSVL_ROVOL;
+	}
+	else if (vlocation->vldb.vidmask & AFSC_VOL_STM_BAK) {
+		vid = vlocation->vldb.vid[2];
+		voltype = AFSVL_BACKVOL;
+	}
+	else {
+		BUG();
+		vid = 0;
+		voltype = 0;
+	}
+
+	/* contact the chosen server */
+	ret = afs_server_lookup(vlocation->cell,
+				&vlocation->cell->vl_addrs[vlocation->upd_curr_svix],
+				&vlocation->upd_op.server);
+	switch (ret) {
+	case 0:
+		break;
+	case -ENOMEM:
+	case -ENONET:
+	default:
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	/* initiate the update operation */
+	ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op,vid,voltype);
+	if (ret<0) {
+		_leave(" = %d",ret);
+		return ret;
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vlocation_update_begin() */
+
+/*****************************************************************************/
+/*
+ * abandon updating a VL record
+ * - does not restart the update timer
+ */
+static void afs_vlocation_update_abandon(afs_vlocation_t *vlocation,
+					 afs_vlocation_upd_t state,
+					 int ret)
+{
+	_enter("%s,%u",vlocation->vldb.name,state);
+
+	if (ret<0)
+		printk("kAFS: Abandoning VL update '%s': %d\n",vlocation->vldb.name,ret);
+
+	/* discard the server record */
+	if (vlocation->upd_op.server) {
+		afs_put_server(vlocation->upd_op.server);
+		vlocation->upd_op.server = NULL;
+	}
+
+	spin_lock(&afs_vlocation_update_lock);
+	afs_vlocation_update = NULL;
+	vlocation->upd_state = state;
+
+	/* TODO: start updating next VL record on pending list */
+
+	spin_unlock(&afs_vlocation_update_lock);
+
+	_leave("");
+} /* end afs_vlocation_update_abandon() */
+
+/*****************************************************************************/
+/*
+ * handle periodic update timeouts and busy retry timeouts
+ * - called from kafstimod
+ */
+static void afs_vlocation_update_timer(afs_timer_t *timer)
+{
+	afs_vlocation_t *vlocation = list_entry(timer,afs_vlocation_t,upd_timer);
+	int ret;
+
+	_enter("%s",vlocation->vldb.name);
+
+	/* only update if not in the graveyard (defend against putting too) */
+	spin_lock(&vlocation->cell->vl_gylock);
+
+	if (!atomic_read(&vlocation->usage))
+		goto out_unlock1;
+
+	spin_lock(&afs_vlocation_update_lock);
+
+	/* if we were woken up due to EBUSY sleep then restart immediately if possible or else jump
+	 * to front of pending queue */
+	if (vlocation->upd_state==AFS_VLUPD_BUSYSLEEP) {
+		if (afs_vlocation_update) {
+			list_add(&vlocation->upd_op.link,&afs_vlocation_update_pendq);
+		}
+		else {
+			afs_get_vlocation(vlocation);
+			afs_vlocation_update = vlocation;
+			vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+		}
+		goto out_unlock2;
+	}
+
+	/* put on pending queue if there's already another update in progress */
+	if (afs_vlocation_update) {
+		vlocation->upd_state = AFS_VLUPD_PENDING;
+		list_add_tail(&vlocation->upd_op.link,&afs_vlocation_update_pendq);
+		goto out_unlock2;
+	}
+
+	/* hold a ref on it while actually updating */
+	afs_get_vlocation(vlocation);
+	afs_vlocation_update = vlocation;
+	vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+
+	spin_unlock(&afs_vlocation_update_lock);
+	spin_unlock(&vlocation->cell->vl_gylock);
+
+	/* okay... we can start the update */
+	_debug("BEGIN VL UPDATE [%s]",vlocation->vldb.name);
+	vlocation->upd_first_svix = vlocation->cell->vl_curr_svix;
+	vlocation->upd_curr_svix = vlocation->upd_first_svix;
+	vlocation->upd_rej_cnt = 0;
+	vlocation->upd_busy_cnt = 0;
+
+	ret = afs_vlocation_update_begin(vlocation);
+	if (ret<0) {
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,ret);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+	}
+
+	_leave("");
+	return;
+
+ out_unlock2:
+	spin_unlock(&afs_vlocation_update_lock);
+ out_unlock1:
+	spin_unlock(&vlocation->cell->vl_gylock);
+	_leave("");
+	return;
+
+} /* end afs_vlocation_update_timer() */
+
+/*****************************************************************************/
+/*
+ * attend to an update operation upon which an event happened
+ * - called in kafsasyncd context
+ */
+static void afs_vlocation_update_attend(afs_async_op_t *op)
+{
+	afsc_vldb_record_t vldb;
+	afs_vlocation_t *vlocation = list_entry(op,afs_vlocation_t,upd_op);
+	unsigned tmp;
+	int ret;
+
+	_enter("%s",vlocation->vldb.name);
+
+	ret = afs_rxvl_get_entry_by_id_async2(op,&vldb);
+	switch (ret) {
+	case -EAGAIN:
+		_leave(" [unfinished]");
+		return;
+
+	case 0:
+		_debug("END VL UPDATE: %d\n",ret);
+		vlocation->valid = 1;
+
+		_debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }",
+		       vldb.vidmask,
+		       ntohl(vldb.servers[0].s_addr),vldb.srvtmask[0],
+		       ntohl(vldb.servers[1].s_addr),vldb.srvtmask[1],
+		       ntohl(vldb.servers[2].s_addr),vldb.srvtmask[2]
+		       );
+
+		_debug("Vids: %08x %08x %08x",vldb.vid[0],vldb.vid[1],vldb.vid[2]);
+
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,0);
+
+		down_write(&vlocation->cell->vl_sem);
+
+		/* actually update the cache */
+		if (strncmp(vldb.name,vlocation->vldb.name,sizeof(vlocation->vldb.name))!=0)
+			printk("kAFS: name of volume '%s' changed to '%s' on server\n",
+			       vlocation->vldb.name,vldb.name);
+
+		memcpy(&vlocation->vldb,&vldb,sizeof(vlocation->vldb));
+
+#if 0
+		/* add volume entry to local cache */
+		ret = afsc_update_vlocation(vlocation);
+#endif
+
+		up_write(&vlocation->cell->vl_sem);
+
+		if (ret<0)
+			printk("kAFS: failed to update local cache: %d\n",ret);
+
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+		_leave(" [found]");
+		return;
+
+	case -ENOMEDIUM:
+		vlocation->upd_rej_cnt++;
+		goto try_next;
+
+		/* the server is locked - retry in a very short while */
+	case -EBUSY:
+		vlocation->upd_busy_cnt++;
+		if (vlocation->upd_busy_cnt>3)
+			goto try_next; /* too many retries */
+
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_BUSYSLEEP,0);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,HZ/2);
+		afs_put_vlocation(vlocation);
+		_leave(" [busy]");
+		return;
+
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -EREMOTEIO:
+		/* record bad vlserver info in the cell too
+		 * - TODO: use down_write_trylock() if available
+		 */
+		if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix)
+			vlocation->cell->vl_curr_svix =
+				vlocation->cell->vl_curr_svix % vlocation->cell->vl_naddrs;
+
+	case -EBADRQC:
+	case -EINVAL:
+	case -EACCES:
+	case -EBADMSG:
+		goto try_next;
+
+	default:
+		goto abandon;
+	}
+
+	/* try contacting the next server */
+ try_next:
+	vlocation->upd_busy_cnt = 0;
+
+	if (vlocation->upd_op.server) {
+		/* discard the server record */
+		afs_put_server(vlocation->upd_op.server);
+		vlocation->upd_op.server = NULL;
+	}
+
+	tmp = vlocation->cell->vl_naddrs;
+	if (tmp==0)
+		goto abandon;
+
+	vlocation->upd_curr_svix++;
+	if (vlocation->upd_curr_svix >= tmp) vlocation->upd_curr_svix = 0;
+	if (vlocation->upd_first_svix >= tmp) vlocation->upd_first_svix = tmp - 1;
+
+	/* move to the next server */
+	if (vlocation->upd_curr_svix!=vlocation->upd_first_svix) {
+		afs_vlocation_update_begin(vlocation);
+		_leave(" [next]");
+		return;
+	}
+
+	/* run out of servers to try - was the volume rejected? */
+	if (vlocation->upd_rej_cnt>0) {
+		printk("kAFS: Active volume no longer valid '%s'\n",vlocation->vldb.name);
+		vlocation->valid = 0;
+		afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,0);
+		afs_kafstimod_add_timer(&vlocation->upd_timer,AFS_VLDB_TIMEOUT);
+		afs_put_vlocation(vlocation);
+		_leave(" [invalidated]");
+		return;
+	}
+
+	/* abandon the update */
+ abandon:
+	afs_vlocation_update_abandon(vlocation,AFS_VLUPD_SLEEP,ret);
+	afs_kafstimod_add_timer(&vlocation->upd_timer,HZ*10);
+	afs_put_vlocation(vlocation);
+	_leave(" [abandoned]");
+
+} /* end afs_vlocation_update_attend() */
+
+/*****************************************************************************/
+/*
+ * deal with an update operation being discarded
+ * - called in kafsasyncd context when it's dying due to rmmod
+ * - the call has already been aborted and put()'d
+ */
+static void afs_vlocation_update_discard(afs_async_op_t *op)
+{
+	afs_vlocation_t *vlocation = list_entry(op,afs_vlocation_t,upd_op);
+
+	_enter("%s",vlocation->vldb.name);
+
+	afs_put_server(op->server);
+	op->server = NULL;
+
+	afs_put_vlocation(vlocation);
+
+	_leave("");
+} /* end afs_vlocation_update_discard() */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
new file mode 100644
index 000000000000..ab2d7b241fc3
--- /dev/null
+++ b/fs/afs/vnode.c
@@ -0,0 +1,316 @@
+/* vnode.c: AFS vnode management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "vnode.h"
+#include "internal.h"
+
+static void afs_vnode_cb_timed_out(struct afs_timer *timer);
+
+struct afs_timer_ops afs_vnode_cb_timed_out_ops = {
+	.timed_out	= afs_vnode_cb_timed_out,
+};
+
+/*****************************************************************************/
+/*
+ * handle a callback timing out
+ * TODO: retain a ref to vnode struct for an outstanding callback timeout
+ */
+static void afs_vnode_cb_timed_out(struct afs_timer *timer)
+{
+	afs_server_t *oldserver;
+	afs_vnode_t *vnode;
+
+	vnode = list_entry(timer,afs_vnode_t,cb_timeout);
+
+	_enter("%p",vnode);
+
+	/* set the changed flag in the vnode and release the server */
+	spin_lock(&vnode->lock);
+
+	oldserver = xchg(&vnode->cb_server,NULL);
+	if (oldserver) {
+		vnode->flags |= AFS_VNODE_CHANGED;
+
+		spin_lock(&afs_cb_hash_lock);
+		list_del_init(&vnode->cb_hash_link);
+		spin_unlock(&afs_cb_hash_lock);
+
+		spin_lock(&oldserver->cb_lock);
+		list_del_init(&vnode->cb_link);
+		spin_unlock(&oldserver->cb_lock);
+	}
+
+	spin_unlock(&vnode->lock);
+
+	if (oldserver)
+		afs_put_server(oldserver);
+
+	_leave("");
+} /* end afs_vnode_cb_timed_out() */
+
+/*****************************************************************************/
+/*
+ * finish off updating the recorded status of a file
+ * - starts callback expiry timer
+ * - adds to server's callback list
+ */
+void afs_vnode_finalise_status_update(afs_vnode_t *vnode, afs_server_t *server, int ret)
+{
+	afs_server_t *oldserver = NULL;
+
+	_enter("%p,%p,%d",vnode,server,ret);
+
+	spin_lock(&vnode->lock);
+
+	vnode->flags &= ~AFS_VNODE_CHANGED;
+
+	if (ret==0) {
+		/* adjust the callback timeout appropriately */
+		afs_kafstimod_add_timer(&vnode->cb_timeout,vnode->cb_expiry*HZ);
+
+		spin_lock(&afs_cb_hash_lock);
+		list_del(&vnode->cb_hash_link);
+		list_add_tail(&vnode->cb_hash_link,&afs_cb_hash(server,&vnode->fid));
+		spin_unlock(&afs_cb_hash_lock);
+
+		/* swap ref to old callback server with that for new callback server */
+		oldserver = xchg(&vnode->cb_server,server);
+		if (oldserver!=server) {
+			if (oldserver) {
+				spin_lock(&oldserver->cb_lock);
+				list_del_init(&vnode->cb_link);
+				spin_unlock(&oldserver->cb_lock);
+			}
+
+			afs_get_server(server);
+			spin_lock(&server->cb_lock);
+			list_add_tail(&vnode->cb_link,&server->cb_promises);
+			spin_unlock(&server->cb_lock);
+		}
+		else {
+			/* same server */
+			oldserver = NULL;
+		}
+	}
+	else if (ret==-ENOENT) {
+		/* the file was deleted - clear the callback timeout */
+		oldserver = xchg(&vnode->cb_server,NULL);
+		afs_kafstimod_del_timer(&vnode->cb_timeout);
+
+		_debug("got NOENT from server - marking file deleted");
+		vnode->flags |= AFS_VNODE_DELETED;
+	}
+
+	vnode->update_cnt--;
+
+	spin_unlock(&vnode->lock);
+
+	wake_up_all(&vnode->update_waitq);
+
+	if (oldserver)
+		afs_put_server(oldserver);
+
+	_leave("");
+
+} /* end afs_vnode_finalise_status_update() */
+
+/*****************************************************************************/
+/*
+ * fetch file status from the volume
+ * - don't issue a fetch if:
+ *   - the changed bit is not set and there's a valid callback
+ *   - there are any outstanding ops that will fetch the status
+ * - TODO implement local caching
+ */
+int afs_vnode_fetch_status(afs_vnode_t *vnode)
+{
+	afs_server_t *server;
+	int ret;
+
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("%s,{%u,%u,%u}",vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,vnode->fid.vnode,vnode->fid.unique);
+
+	if (!(vnode->flags & AFS_VNODE_CHANGED) && vnode->cb_server) {
+		_leave(" [unchanged]");
+		return 0;
+	}
+
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" [deleted]");
+		return -ENOENT;
+	}
+
+	spin_lock(&vnode->lock);
+
+	if (!(vnode->flags & AFS_VNODE_CHANGED)) {
+		spin_unlock(&vnode->lock);
+		_leave(" [unchanged]");
+		return 0;
+	}
+
+	if (vnode->update_cnt>0) {
+		/* someone else started a fetch */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&vnode->update_waitq,&myself);
+
+		/* wait for the status to be updated */
+		for (;;) {
+			if (!(vnode->flags & AFS_VNODE_CHANGED))	break;
+			if (vnode->flags & AFS_VNODE_DELETED)		break;
+
+			/* it got updated and invalidated all before we saw it */
+			if (vnode->update_cnt==0) {
+				remove_wait_queue(&vnode->update_waitq,&myself);
+				set_current_state(TASK_RUNNING);
+				goto get_anyway;
+			}
+
+			spin_unlock(&vnode->lock);
+
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+			spin_lock(&vnode->lock);
+		}
+
+		remove_wait_queue(&vnode->update_waitq,&myself);
+		spin_unlock(&vnode->lock);
+		set_current_state(TASK_RUNNING);
+
+		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
+	}
+
+ get_anyway:
+	/* okay... we're going to have to initiate the op */
+	vnode->update_cnt++;
+
+	spin_unlock(&vnode->lock);
+
+	/* merge AFS status fetches and clear outstanding callback on this vnode */
+	do {
+		/* pick a server to query */
+		ret = afs_volume_pick_fileserver(vnode->volume,&server);
+		if (ret<0)
+			return ret;
+
+		_debug("USING SERVER: %08x\n",ntohl(server->addr.s_addr));
+
+		ret = afs_rxfs_fetch_file_status(server,vnode,NULL);
+
+	} while (!afs_volume_release_fileserver(vnode->volume,server,ret));
+
+	/* adjust the flags */
+	afs_vnode_finalise_status_update(vnode,server,ret);
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vnode_fetch_status() */
+
+/*****************************************************************************/
+/*
+ * fetch file data from the volume
+ * - TODO implement caching and server failover
+ */
+int afs_vnode_fetch_data(afs_vnode_t *vnode, struct afs_rxfs_fetch_descriptor *desc)
+{
+	afs_server_t *server;
+	int ret;
+
+	_enter("%s,{%u,%u,%u}",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique);
+
+	/* this op will fetch the status */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+
+	/* merge in AFS status fetches and clear outstanding callback on this vnode */
+	do {
+		/* pick a server to query */
+		ret = afs_volume_pick_fileserver(vnode->volume,&server);
+		if (ret<0)
+			return ret;
+
+		_debug("USING SERVER: %08x\n",ntohl(server->addr.s_addr));
+
+		ret = afs_rxfs_fetch_file_data(server,vnode,desc,NULL);
+
+	} while (!afs_volume_release_fileserver(vnode->volume,server,ret));
+
+	/* adjust the flags */
+	afs_vnode_finalise_status_update(vnode,server,ret);
+
+	_leave(" = %d",ret);
+	return ret;
+
+} /* end afs_vnode_fetch_data() */
+
+/*****************************************************************************/
+/*
+ * break any outstanding callback on a vnode
+ * - only relevent to server that issued it
+ */
+int afs_vnode_give_up_callback(afs_vnode_t *vnode)
+{
+	afs_server_t *server;
+	int ret;
+
+	_enter("%s,{%u,%u,%u}",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique);
+
+	spin_lock(&afs_cb_hash_lock);
+	list_del_init(&vnode->cb_hash_link);
+	spin_unlock(&afs_cb_hash_lock);
+
+	/* set the changed flag in the vnode and release the server */
+	spin_lock(&vnode->lock);
+
+	afs_kafstimod_del_timer(&vnode->cb_timeout);
+
+	server = xchg(&vnode->cb_server,NULL);
+	if (server) {
+		vnode->flags |= AFS_VNODE_CHANGED;
+
+		spin_lock(&server->cb_lock);
+		list_del_init(&vnode->cb_link);
+		spin_unlock(&server->cb_lock);
+	}
+
+	spin_unlock(&vnode->lock);
+
+	ret = 0;
+	if (server) {
+		ret = afs_rxfs_give_up_callback(server,vnode);
+		afs_put_server(server);
+	}
+
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_vnode_give_up_callback() */
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
new file mode 100644
index 000000000000..ec2c412c4214
--- /dev/null
+++ b/fs/afs/vnode.h
@@ -0,0 +1,88 @@
+/* vnode.h: AFS vnode record
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VNODE_H
+#define _LINUX_AFS_VNODE_H
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include "server.h"
+#include "kafstimod.h"
+
+#ifdef __KERNEL__
+
+struct afs_rxfs_fetch_descriptor;
+
+/*****************************************************************************/
+/*
+ * AFS inode private data
+ */
+struct afs_vnode
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	struct inode		vfs_inode;	/* the VFS's inode record */
+#else
+	struct inode		*inode;		/* the VFS's inode */
+#endif
+
+	afs_volume_t		*volume;	/* volume on which vnode resides */
+	afs_fid_t		fid;		/* the file identifier for this inode */
+	afs_file_status_t	status;		/* AFS status info for this file */
+	unsigned		nix;		/* vnode index in cache */
+
+	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
+	unsigned		update_cnt;	/* number of outstanding ops that will update the
+						 * status */
+	spinlock_t		lock;		/* waitqueue/flags lock */
+	unsigned		flags;
+#define AFS_VNODE_CHANGED	0x00000001	/* set if vnode reported changed by callback */
+#define AFS_VNODE_DELETED	0x00000002	/* set if vnode deleted on server */
+#define AFS_VNODE_MOUNTPOINT	0x00000004	/* set if vnode is a mountpoint symlink */
+
+	/* outstanding callback notification on this file */
+	afs_server_t		*cb_server;	/* server that made the current promise */
+	struct list_head	cb_link;	/* link in server's promises list */
+	struct list_head	cb_hash_link;	/* link in master callback hash */
+	afs_timer_t		cb_timeout;	/* timeout on promise */
+	unsigned		cb_version;	/* callback version */
+	unsigned		cb_expiry;	/* callback expiry time */
+	afs_callback_type_t	cb_type;	/* type of callback */
+};
+
+static inline afs_vnode_t *AFS_FS_I(struct inode *inode)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	return list_entry(inode,afs_vnode_t,vfs_inode);
+#else
+	return inode->u.generic_ip;
+#endif
+}
+
+static inline struct inode *AFS_VNODE_TO_I(afs_vnode_t *vnode)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	return &vnode->vfs_inode;
+#else
+	return vnode->inode;
+#endif
+}
+
+extern int afs_vnode_fetch_status(afs_vnode_t *vnode);
+
+extern int afs_vnode_fetch_data(afs_vnode_t *vnode, struct afs_rxfs_fetch_descriptor *desc);
+
+extern int afs_vnode_give_up_callback(afs_vnode_t *vnode);
+
+extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
new file mode 100644
index 000000000000..198c355c715b
--- /dev/null
+++ b/fs/afs/volume.c
@@ -0,0 +1,430 @@
+/* volume.c: AFS volume management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "volume.h"
+#include "cell.h"
+#include "cmservice.h"
+#include "fsclient.h"
+#include "vlclient.h"
+#include "internal.h"
+
+const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
+
+/*****************************************************************************/
+/*
+ * lookup a volume by name
+ * - this can be one of the following:
+ *	"%[cell:]volume[.]"		R/W volume
+ *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0), or R/W (rwparent=1) volume
+ *	"%[cell:]volume.readonly"	R/O volume
+ *	"#[cell:]volume.readonly"	R/O volume
+ *	"%[cell:]volume.backup"		Backup volume
+ *	"#[cell:]volume.backup"		Backup volume
+ *
+ * The cell name is optional, and defaults to the current cell.
+ *
+ * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin Guide
+ * - Rule 1: Explicit type suffix forces access of that type or nothing
+ *           (no suffix, then use Rule 2 & 3)
+ * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W if not available
+ * - Rule 3: If parent volume is R/W, then only mount R/W volume unless explicitly told otherwise
+ */
+int afs_volume_lookup(char *name, int rwparent, afs_volume_t **_volume)
+{
+	afs_vlocation_t *vlocation = NULL;
+	afs_voltype_t type;
+	afs_volume_t *volume = NULL;
+	afs_cell_t *cell = NULL;
+	char *cellname, *volname, *suffix;
+	char srvtmask;
+	int force, ret, loop;
+
+	_enter(",%s,",name);
+
+	if (!name || (name[0]!='%' && name[0]!='#') || !name[1]) {
+		printk("kAFS: unparsable volume name\n");
+		return -EINVAL;
+	}
+
+	/* determine the type of volume we're looking for */
+	force = 0;
+	type = AFSVL_ROVOL;
+
+	if (rwparent || name[0]=='%') {
+		type = AFSVL_RWVOL;
+		force = 1;
+	}
+
+	suffix = strrchr(name,'.');
+	if (suffix) {
+		if (strcmp(suffix,".readonly")==0) {
+			type = AFSVL_ROVOL;
+			force = 1;
+		}
+		else if (strcmp(suffix,".backup")==0) {
+			type = AFSVL_BACKVOL;
+			force = 1;
+		}
+		else if (suffix[1]==0) {
+			*suffix = 0;
+			suffix = NULL;
+		}
+		else {
+			suffix = NULL;
+		}
+	}
+
+	/* split the cell and volume names */
+	name++;
+	volname = strchr(name,':');
+	if (volname) {
+		*volname++ = 0;
+		cellname = name;
+	}
+	else {
+		volname = name;
+		cellname = NULL;
+	}
+
+	_debug("CELL:%s VOLUME:%s SUFFIX:%s TYPE:%d%s",
+	       cellname,volname,suffix?:"-",type,force?" FORCE":"");
+
+	/* lookup the cell record */
+	ret = afs_cell_lookup(cellname,&cell);
+	if (ret<0)
+		printk("kAFS: unable to lookup cell '%s'\n",cellname?:"");
+
+	if (cellname) volname[-1] = ':';
+	if (ret<0)
+		goto error;
+
+	/* lookup the volume location record */
+	if (suffix) *suffix = 0;
+	ret = afs_vlocation_lookup(cell,volname,&vlocation);
+	if (suffix) *suffix = '.';
+	if (ret<0)
+		goto error;
+
+	/* make the final decision on the type we want */
+	ret = -ENOMEDIUM;
+	if (force && !(vlocation->vldb.vidmask & (1<<type)))
+		goto error;
+
+	srvtmask = 0;
+	for (loop=0; loop<vlocation->vldb.nservers; loop++)
+		srvtmask |= vlocation->vldb.srvtmask[loop];
+
+	if (force) {
+		if (!(srvtmask & (1 <<type)))
+			goto error;
+	}
+	else if (srvtmask & AFSC_VOL_STM_RO) {
+		type = AFSVL_ROVOL;
+	}
+	else if (srvtmask & AFSC_VOL_STM_RW) {
+		type = AFSVL_RWVOL;
+	}
+	else {
+		goto error;
+	}
+
+	down_write(&cell->vl_sem);
+
+	/* is the volume already active? */
+	if (vlocation->vols[type]) {
+		/* yes - re-use it */
+		volume = vlocation->vols[type];
+		afs_get_volume(volume);
+		goto success;
+	}
+
+	/* create a new volume record */
+	_debug("creating new volume record");
+
+	ret = -ENOMEM;
+	volume = kmalloc(sizeof(afs_volume_t),GFP_KERNEL);
+	if (!volume)
+		goto error_up;
+
+	memset(volume,0,sizeof(afs_volume_t));
+	atomic_set(&volume->usage,1);
+	volume->type = type;
+	volume->type_force = force;
+	volume->cell = cell;
+	volume->vid = vlocation->vldb.vid[type];
+
+	init_rwsem(&volume->server_sem);
+
+	/* look up all the applicable server records */
+	for (loop=0; loop<8; loop++) {
+		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
+			ret = afs_server_lookup(volume->cell,
+						&vlocation->vldb.servers[loop],
+						&volume->servers[volume->nservers]);
+			if (ret<0)
+				goto error_discard;
+
+			volume->nservers++;
+		}
+	}
+
+	/* attach the cache and volume location */
+#if 0
+	afs_get_cache(cache);		volume->cache = cache;
+#endif
+	afs_get_vlocation(vlocation);	volume->vlocation = vlocation;
+
+	vlocation->vols[type] = volume;
+
+ success:
+	_debug("kAFS selected %s volume %08x",afs_voltypes[volume->type],volume->vid);
+	*_volume = volume;
+	ret = 0;
+
+	/* clean up */
+ error_up:
+	up_write(&cell->vl_sem);
+ error:
+	if (vlocation)	afs_put_vlocation(vlocation);
+	if (cell)	afs_put_cell(cell);
+
+	_leave(" = %d (%p)",ret,volume);
+	return ret;
+
+ error_discard:
+	up_write(&cell->vl_sem);
+
+	for (loop=volume->nservers-1; loop>=0; loop--)
+		if (volume->servers[loop])
+			afs_put_server(volume->servers[loop]);
+
+	kfree(volume);
+	goto error;
+} /* end afs_volume_lookup() */
+
+/*****************************************************************************/
+/*
+ * destroy a volume record
+ */
+void afs_put_volume(afs_volume_t *volume)
+{
+	afs_vlocation_t *vlocation;
+	int loop;
+
+	_enter("%p",volume);
+
+	vlocation = volume->vlocation;
+
+	/* sanity check */
+	if (atomic_read(&volume->usage)<=0)
+		BUG();
+
+	/* to prevent a race, the decrement and the dequeue must be effectively atomic */
+	down_write(&vlocation->cell->vl_sem);
+
+	if (likely(!atomic_dec_and_test(&volume->usage))) {
+		up_write(&vlocation->cell->vl_sem);
+		_leave("");
+		return;
+	}
+
+	vlocation->vols[volume->type] = NULL;
+
+	up_write(&vlocation->cell->vl_sem);
+
+	afs_put_vlocation(vlocation);
+
+	/* finish cleaning up the volume */
+#if 0
+	if (volume->cache)	afs_put_cache(volume->cache);
+#endif
+
+	for (loop=volume->nservers-1; loop>=0; loop--)
+		if (volume->servers[loop])
+			afs_put_server(volume->servers[loop]);
+
+	kfree(volume);
+
+	_leave(" [destroyed]");
+} /* end afs_put_volume() */
+
+/*****************************************************************************/
+/*
+ * pick a server to use to try accessing this volume
+ * - returns with an elevated usage count on the server chosen
+ */
+int afs_volume_pick_fileserver(afs_volume_t *volume, afs_server_t **_server)
+{
+	afs_server_t *server;
+	int ret, state, loop;
+
+	_enter("%s",volume->vlocation->vldb.name);
+
+	down_read(&volume->server_sem);
+
+	/* handle the no-server case */
+	if (volume->nservers==0) {
+		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
+		up_read(&volume->server_sem);
+		_leave(" = %d [no servers]",ret);
+		return ret;
+	}
+
+	/* basically, just search the list for the first live server and use that */
+	ret = 0;
+	for (loop=0; loop<volume->nservers; loop++) {
+		server = volume->servers[loop];
+		state = server->fs_state;
+
+		switch (state) {
+			/* found an apparently healthy server */
+		case 0:
+			afs_get_server(server);
+			up_read(&volume->server_sem);
+			*_server = server;
+			_leave(" = 0 (picked %08x)",ntohl(server->addr.s_addr));
+			return 0;
+
+		case -ENETUNREACH:
+			if (ret==0)
+				ret = state;
+			break;
+
+		case -EHOSTUNREACH:
+			if (ret==0 || ret==-ENETUNREACH)
+				ret = state;
+			break;
+
+		case -ECONNREFUSED:
+			if (ret==0 || ret==-ENETUNREACH || ret==-EHOSTUNREACH)
+				ret = state;
+			break;
+
+		default:
+		case -EREMOTEIO:
+			if (ret==0 ||
+			    ret==-ENETUNREACH ||
+			    ret==-EHOSTUNREACH ||
+			    ret==-ECONNREFUSED)
+				ret = state;
+			break;
+		}
+	}
+
+	/* no available servers
+	 * - TODO: handle the no active servers case better
+	 */
+	up_read(&volume->server_sem);
+	_leave(" = %d",ret);
+	return ret;
+} /* end afs_volume_pick_fileserver() */
+
+/*****************************************************************************/
+/*
+ * release a server after use
+ * - releases the ref on the server struct that was acquired by picking
+ * - records result of using a particular server to access a volume
+ * - return 0 to try again, 1 if okay or to issue error
+ */
+int afs_volume_release_fileserver(afs_volume_t *volume, afs_server_t *server, int result)
+{
+	unsigned loop;
+
+	_enter("%s,%08x,%d",volume->vlocation->vldb.name,ntohl(server->addr.s_addr),result);
+
+	switch (result) {
+		/* success */
+	case 0:
+		server->fs_act_jif = jiffies;
+		break;
+
+		/* the fileserver denied all knowledge of the volume */
+	case -ENOMEDIUM:
+		server->fs_act_jif = jiffies;
+		down_write(&volume->server_sem);
+
+		/* first, find where the server is in the active list (if it is) */
+		for (loop=0; loop<volume->nservers; loop++)
+			if (volume->servers[loop]==server)
+				goto present;
+
+		/* no longer there - may have been discarded by another op */
+		goto try_next_server_upw;
+
+	present:
+		volume->nservers--;
+		memmove(&volume->servers[loop],
+			&volume->servers[loop+1],
+			sizeof(volume->servers[loop]) * (volume->nservers - loop)
+			);
+		volume->servers[volume->nservers] = NULL;
+		afs_put_server(server);
+		volume->rjservers++;
+
+		if (volume->nservers>0)
+			/* another server might acknowledge its existence */
+			goto try_next_server_upw;
+
+		/* handle the case where all the fileservers have rejected the volume
+		 * - TODO: try asking the fileservers for volume information
+		 * - TODO: contact the VL server again to see if the volume is no longer registered
+		 */
+		up_write(&volume->server_sem);
+		afs_put_server(server);
+		_leave(" [completely rejected]");
+		return 1;
+
+		/* problem reaching the server */
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -EREMOTEIO:
+		/* mark the server as dead
+		 * TODO: vary dead timeout depending on error
+		 */
+		spin_lock(&server->fs_lock);
+		if (!server->fs_state) {
+			server->fs_dead_jif = jiffies + HZ * 10;
+			server->fs_state = result;
+			printk("kAFS: SERVER DEAD state=%d\n",result);
+		}
+		spin_unlock(&server->fs_lock);
+		goto try_next_server;
+
+		/* miscellaneous error */
+	default:
+		server->fs_act_jif = jiffies;
+	case -ENOMEM:
+	case -ENONET:
+		break;
+	}
+
+	/* tell the caller to accept the result */
+	afs_put_server(server);
+	_leave("");
+	return 1;
+
+	/* tell the caller to loop around and try the next server */
+ try_next_server_upw:
+	up_write(&volume->server_sem);
+ try_next_server:
+	afs_put_server(server);
+	_leave(" [try next server]");
+	return 0;
+
+} /* end afs_volume_release_fileserver() */
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
new file mode 100644
index 000000000000..1842d983677f
--- /dev/null
+++ b/fs/afs/volume.h
@@ -0,0 +1,92 @@
+/* volume.h: AFS volume management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_AFS_VOLUME_H
+#define _LINUX_AFS_VOLUME_H
+
+#include "types.h"
+#include "fsclient.h"
+#include "kafstimod.h"
+#include "kafsasyncd.h"
+#include "cache-layout.h"
+
+#define __packed __attribute__((packed))
+
+typedef enum {
+	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
+	AFS_VLUPD_PENDING,		/* on pending queue */
+	AFS_VLUPD_INPROGRESS,		/* op in progress */
+	AFS_VLUPD_BUSYSLEEP,		/* sleeping because server returned EBUSY */
+	
+} __attribute__((packed)) afs_vlocation_upd_t;
+
+/*****************************************************************************/
+/*
+ * AFS volume location record
+ */
+struct afs_vlocation
+{
+	atomic_t		usage;
+	struct list_head	link;		/* link in cell volume location list */
+	afs_timer_t		timeout;	/* decaching timer */
+	afs_cell_t		*cell;		/* cell to which volume belongs */
+	struct list_head	caches;		/* backing caches */
+	afsc_vldb_record_t	vldb;		/* volume information DB record */
+	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
+	rwlock_t		lock;		/* access lock */
+	unsigned long		read_jif;	/* time at which last read from vlserver */
+	afs_timer_t		upd_timer;	/* update timer */
+	afs_async_op_t		upd_op;		/* update operation */
+	afs_vlocation_upd_t	upd_state;	/* update state */
+	unsigned short		upd_first_svix;	/* first server index during update */
+	unsigned short		upd_curr_svix;	/* current server index during update */
+	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
+	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
+	unsigned short		valid;		/* T if valid */
+};
+
+extern int afs_vlocation_lookup(afs_cell_t *cell, const char *name, afs_vlocation_t **_vlocation);
+
+#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern void __afs_put_vlocation(afs_vlocation_t *vlocation);
+extern void afs_put_vlocation(afs_vlocation_t *vlocation);
+extern void afs_vlocation_do_timeout(afs_vlocation_t *vlocation);
+
+/*****************************************************************************/
+/*
+ * AFS volume access record
+ */
+struct afs_volume
+{
+	atomic_t		usage;
+	afs_cell_t		*cell;		/* cell to which belongs (unrefd ptr) */
+	afs_vlocation_t		*vlocation;	/* volume location */
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t __packed	type;		/* type of volume */
+	char			type_force;	/* force volume type (suppress R/O -> R/W) */
+	unsigned short		nservers;	/* number of server slots filled */
+	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
+	afs_server_t		*servers[8];	/* servers on which volume resides (ordered) */
+	struct rw_semaphore	server_sem;	/* lock for accessing current server */
+};
+
+extern int afs_volume_lookup(char *name, int ro, afs_volume_t **_volume);
+
+#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern void afs_put_volume(afs_volume_t *volume);
+
+extern int afs_volume_pick_fileserver(afs_volume_t *volume, afs_server_t **_server);
+
+extern int afs_volume_release_fileserver(afs_volume_t *volume, afs_server_t *server, int result);
+
+#endif /* _LINUX_AFS_VOLUME_H */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 33fc669b7842..1ad7f467993b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -526,8 +526,6 @@ int check_disk_change(struct block_device *bdev)
 {
 	struct block_device_operations * bdops = bdev->bd_op;
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
-	struct gendisk *disk;
-	int part;
 
 	if (bdops->check_media_change == NULL)
 		return 0;
@@ -537,10 +535,9 @@ int check_disk_change(struct block_device *bdev)
 	if (invalidate_device(dev, 0))
 		printk("VFS: busy inodes on changed media.\n");
 
-	disk = get_gendisk(bdev->bd_dev, &part);
 	if (bdops->revalidate)
 		bdops->revalidate(dev);
-	if (disk && disk->minor_shift)
+	if (bdev->bd_disk->minors > 1)
 		bdev->bd_invalidated = 1;
 	return 1;
 }
@@ -548,12 +545,11 @@ int check_disk_change(struct block_device *bdev)
 int full_check_disk_change(struct block_device *bdev)
 {
 	int res = 0;
-	int n;
 	if (bdev->bd_contains != bdev)
 		BUG();
 	down(&bdev->bd_sem);
 	if (check_disk_change(bdev)) {
-		rescan_partitions(get_gendisk(bdev->bd_dev, &n), bdev);
+		rescan_partitions(bdev->bd_disk, bdev);
 		res = 1;
 	}
 	up(&bdev->bd_sem);
@@ -595,6 +591,8 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	struct module *owner = NULL;
 	struct block_device_operations *ops, *old;
+	struct gendisk *disk;
+	int part;
 
 	lock_kernel();
 	ops = get_blkfops(major(dev));
@@ -614,33 +612,32 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		if (owner)
 			__MOD_DEC_USE_COUNT(owner);
 	}
+	disk = get_gendisk(bdev->bd_dev, &part);
+	if (!disk)
+		goto out1;
 	if (!bdev->bd_contains) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 		bdev->bd_contains = bdev;
-		if (g && part) {
-			struct block_device *disk;
-			disk = bdget(MKDEV(g->major, g->first_minor));
+		if (part) {
+			struct block_device *whole;
+			whole = bdget(MKDEV(disk->major, disk->first_minor));
 			ret = -ENOMEM;
-			if (!disk)
+			if (!whole)
 				goto out1;
-			ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
+			ret = blkdev_get(whole, file->f_mode, file->f_flags, BDEV_RAW);
 			if (ret)
 				goto out1;
-			bdev->bd_contains = disk;
+			bdev->bd_contains = whole;
 		}
 	}
 	if (bdev->bd_contains == bdev) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
-
+		if (!bdev->bd_openers)
+			bdev->bd_disk = disk;
 		if (!bdev->bd_queue) {
 			struct blk_dev_struct *p = blk_dev + major(dev);
 			bdev->bd_queue = &p->request_queue;
 			if (p->queue)
 				bdev->bd_queue =  p->queue(dev);
 		}
-
 		if (bdev->bd_op->open) {
 			ret = bdev->bd_op->open(inode, file);
 			if (ret)
@@ -648,12 +645,8 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		}
 		if (!bdev->bd_openers) {
 			struct backing_dev_info *bdi;
-			sector_t sect = 0;
-
 			bdev->bd_offset = 0;
-			if (g)
-				sect = get_capacity(g);
-			bd_set_size(bdev, (loff_t)sect << 9);
+			bd_set_size(bdev, (loff_t)get_capacity(disk) << 9);
 			bdi = blk_get_backing_dev_info(bdev);
 			if (bdi == NULL)
 				bdi = &default_backing_dev_info;
@@ -661,19 +654,17 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			bdev->bd_inode->i_data.backing_dev_info = bdi;
 		}
 		if (bdev->bd_invalidated)
-			rescan_partitions(g, bdev);
+			rescan_partitions(disk, bdev);
 	} else {
 		down(&bdev->bd_contains->bd_sem);
 		bdev->bd_contains->bd_part_count++;
 		if (!bdev->bd_openers) {
-			int part;
-			struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 			struct hd_struct *p;
-			p = g->part + part - 1;
+			p = disk->part + part - 1;
 			inode->i_data.backing_dev_info =
 			   bdev->bd_inode->i_data.backing_dev_info =
 			   bdev->bd_contains->bd_inode->i_data.backing_dev_info;
-			if (!p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) || !p->nr_sects) {
 				bdev->bd_contains->bd_part_count--;
 				up(&bdev->bd_contains->bd_sem);
 				ret = -ENXIO;
@@ -682,10 +673,12 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			bdev->bd_queue = bdev->bd_contains->bd_queue;
 			bdev->bd_offset = p->start_sect;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+			bdev->bd_disk = disk;
 		}
 		up(&bdev->bd_contains->bd_sem);
 	}
-	bdev->bd_openers++;
+	if (bdev->bd_openers++)
+		put_disk(disk);
 	up(&bdev->bd_sem);
 	unlock_kernel();
 	return 0;
@@ -699,6 +692,7 @@ out2:
 		}
 	}
 out1:
+	put_disk(disk);
 	if (!old) {
 		bdev->bd_op = NULL;
 		if (owner)
@@ -772,15 +766,18 @@ int blkdev_put(struct block_device *bdev, int kind)
 		up(&bdev->bd_contains->bd_sem);
 	}
 	if (!bdev->bd_openers) {
+		struct gendisk *disk = bdev->bd_disk;
 		if (bdev->bd_op->owner)
 			__MOD_DEC_USE_COUNT(bdev->bd_op->owner);
 		bdev->bd_op = NULL;
 		bdev->bd_queue = NULL;
+		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 		if (bdev != bdev->bd_contains) {
 			blkdev_put(bdev->bd_contains, BDEV_RAW);
 			bdev->bd_contains = NULL;
 		}
+		put_disk(disk);
 	}
 	unlock_kernel();
 	up(&bdev->bd_sem);
@@ -793,25 +790,6 @@ int blkdev_close(struct inode * inode, struct file * filp)
 	return blkdev_put(inode->i_bdev, BDEV_FILE);
 }
 
-static int blkdev_reread_part(struct block_device *bdev)
-{
-	int part;
-	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
-	int res = 0;
-
-	if (!disk || !disk->minor_shift || bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
-		return -EBUSY;
-	res = rescan_partitions(disk, bdev);
-	up(&bdev->bd_sem);
-	return res;
-}
-
 static ssize_t blkdev_file_write(struct file *file, const char *buf,
 				   size_t count, loff_t *ppos)
 {
@@ -820,51 +798,6 @@ static ssize_t blkdev_file_write(struct file *file, const char *buf,
 	return generic_file_write_nolock(file, &local_iov, 1, ppos);
 }
 
-static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
-{
-	struct block_device *bdev = inode->i_bdev;
-	int ret = -EINVAL;
-	switch (cmd) {
-	/*
-	 * deprecated, use the /proc/iosched interface instead
-	 */
-	case BLKELVGET:
-	case BLKELVSET:
-		ret = -ENOTTY;
-		break;
-	case BLKRAGET:
-	case BLKROGET:
-	case BLKBSZGET:
-	case BLKSSZGET:
-	case BLKFRAGET:
-	case BLKSECTGET:
-	case BLKRASET:
-	case BLKFRASET:
-	case BLKBSZSET:
-	case BLKPG:
-		ret = blk_ioctl(bdev, cmd, arg);
-		break;
-	case BLKRRPART:
-		ret = blkdev_reread_part(bdev);
-		break;
-	default:
-		if (bdev->bd_op->ioctl)
-			ret =bdev->bd_op->ioctl(inode, file, cmd, arg);
-		if (ret == -EINVAL) {
-			switch (cmd) {
-				case BLKGETSIZE:
-				case BLKGETSIZE64:
-				case BLKFLSBUF:
-				case BLKROSET:
-					ret = blk_ioctl(bdev,cmd,arg);
-					break;
-			}
-		}
-	}
-	return ret;
-}
-
 struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
diff --git a/fs/buffer.c b/fs/buffer.c
index d024b78c3e60..35d43421c3a8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -811,6 +811,13 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 			if (buffer_dirty(bh)) {
 				get_bh(bh);
 				spin_unlock(lock);
+				/*
+				 * Ensure any pending I/O completes so that
+				 * ll_rw_block() actually writes the current
+				 * contents - it is a noop if I/O is still in
+				 * flight on potentially older contents.
+				 */
+				wait_on_buffer(bh);
 				ll_rw_block(WRITE, 1, &bh);
 				brelse(bh);
 				spin_lock(lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index ef0871dbcdb2..d0fcfeba16ee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -637,6 +637,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
+	dentry->d_cookie = NULL;
 	INIT_LIST_HEAD(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
new file mode 100644
index 000000000000..d589103eb820
--- /dev/null
+++ b/fs/dcookies.c
@@ -0,0 +1,323 @@
+/*
+ * dcookies.c
+ *
+ * Copyright 2002 John Levon <levon@movementarian.org>
+ *
+ * Persistent cookie-path mappings. These are used by
+ * profilers to convert a per-task EIP value into something
+ * non-transitory that can be processed at a later date.
+ * This is done by locking the dentry/vfsmnt pair in the
+ * kernel until released by the tasks needing the persistent
+ * objects. The tag is simply an u32 that refers
+ * to the pair and can be looked up from userspace.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/dcookies.h>
+#include <asm/uaccess.h>
+
+/* The dcookies are allocated from a kmem_cache and
+ * hashed onto a small number of lists. None of the
+ * code here is particularly performance critical
+ */
+struct dcookie_struct {
+	struct dentry * dentry;
+	struct vfsmount * vfsmnt;
+	struct list_head hash_list;
+};
+
+static LIST_HEAD(dcookie_users);
+static DECLARE_MUTEX(dcookie_sem);
+static kmem_cache_t * dcookie_cache;
+static struct list_head * dcookie_hashtable;
+static size_t hash_size;
+
+static inline int is_live(void)
+{
+	return !(list_empty(&dcookie_users));
+}
+
+
+/* The dentry is locked, its address will do for the cookie */
+static inline u32 dcookie_value(struct dcookie_struct * dcs)
+{
+	return (u32)dcs->dentry;
+}
+
+
+static size_t dcookie_hash(u32 dcookie)
+{
+	return (dcookie >> 2) & (hash_size - 1);
+}
+
+
+static struct dcookie_struct * find_dcookie(u32 dcookie)
+{
+	struct dcookie_struct * found = 0;
+	struct dcookie_struct * dcs;
+	struct list_head * pos;
+	struct list_head * list;
+
+	list = dcookie_hashtable + dcookie_hash(dcookie);
+
+	list_for_each(pos, list) {
+		dcs = list_entry(pos, struct dcookie_struct, hash_list);
+		if (dcookie_value(dcs) == dcookie) {
+			found = dcs;
+			break;
+		}
+	}
+
+	return found;
+}
+
+
+static void hash_dcookie(struct dcookie_struct * dcs)
+{
+	struct list_head * list = dcookie_hashtable + dcookie_hash(dcookie_value(dcs));
+	list_add(&dcs->hash_list, list);
+}
+
+
+static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt)
+{
+	struct dcookie_struct * dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL);
+	if (!dcs)
+		return NULL;
+
+	atomic_inc(&dentry->d_count);
+	atomic_inc(&vfsmnt->mnt_count);
+	dentry->d_cookie = dcs;
+
+	dcs->dentry = dentry;
+	dcs->vfsmnt = vfsmnt;
+	hash_dcookie(dcs);
+
+	return dcs;
+}
+
+
+/* This is the main kernel-side routine that retrieves the cookie
+ * value for a dentry/vfsmnt pair.
+ */
+int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
+	u32 * cookie)
+{
+	int err = 0;
+	struct dcookie_struct * dcs;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	dcs = dentry->d_cookie;
+
+	if (!dcs)
+		dcs = alloc_dcookie(dentry, vfsmnt);
+
+	if (!dcs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*cookie = dcookie_value(dcs);
+
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+/* And here is where the userspace process can look up the cookie value
+ * to retrieve the path.
+ */
+asmlinkage int sys_lookup_dcookie(u32 cookie, char * buf, size_t len)
+{
+	char * kbuf;
+	char * path;
+	int err = -EINVAL;
+	size_t pathlen;
+	struct dcookie_struct * dcs;
+
+	/* we could leak path information to users
+	 * without dir read permission without this
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(dcs = find_dcookie(cookie)))
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!kbuf)
+		goto out;
+	memset(kbuf, 0, PAGE_SIZE);
+
+	/* FIXME: (deleted) ? */
+	path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE);
+
+	err = 0;
+
+	pathlen = kbuf + PAGE_SIZE - path;
+	if (len > pathlen)
+		len = pathlen;
+
+	if (copy_to_user(buf, path, len))
+		err = -EFAULT;
+
+	kfree(kbuf);
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+static int dcookie_init(void)
+{
+	struct list_head * d;
+	unsigned int i, hash_bits;
+	int err = -ENOMEM;
+
+	dcookie_cache = kmem_cache_create("dcookie_cache",
+		sizeof(struct dcookie_struct),
+		0, 0, NULL, NULL);
+
+	if (!dcookie_cache)
+		goto out;
+
+	dcookie_hashtable = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dcookie_hashtable)
+		goto out_kmem;
+
+	err = 0;
+
+	/*
+	 * Find the power-of-two list-heads that can fit into the allocation..
+	 * We don't guarantee that "sizeof(struct list_head)" is necessarily
+	 * a power-of-two.
+	 */
+	hash_size = PAGE_SIZE / sizeof(struct list_head);
+	hash_bits = 0;
+	do {
+		hash_bits++;
+	} while ((hash_size >> hash_bits) != 0);
+	hash_bits--;
+
+	/*
+	 * Re-calculate the actual number of entries and the mask
+	 * from the number of bits we can fit.
+	 */
+	hash_size = 1UL << hash_bits;
+
+	/* And initialize the newly allocated array */
+	d = dcookie_hashtable;
+	i = hash_size;
+	do {
+		INIT_LIST_HEAD(d);
+		d++;
+		i--;
+	} while (i);
+
+out:
+	return err;
+out_kmem:
+	kmem_cache_destroy(dcookie_cache);
+	goto out;
+}
+
+
+static void free_dcookie(struct dcookie_struct * dcs)
+{
+	dcs->dentry->d_cookie = NULL;
+	dput(dcs->dentry);
+	mntput(dcs->vfsmnt);
+	kmem_cache_free(dcookie_cache, dcs);
+}
+
+
+static void dcookie_exit(void)
+{
+	struct list_head * list;
+	struct list_head * pos;
+	struct list_head * pos2;
+	struct dcookie_struct * dcs;
+	size_t i;
+
+	for (i = 0; i < hash_size; ++i) {
+		list = dcookie_hashtable + i;
+		list_for_each_safe(pos, pos2, list) {
+			dcs = list_entry(pos, struct dcookie_struct, hash_list);
+			list_del(&dcs->hash_list);
+			free_dcookie(dcs);
+		}
+	}
+
+	kfree(dcookie_hashtable);
+	kmem_cache_destroy(dcookie_cache);
+}
+
+
+struct dcookie_user {
+	struct list_head next;
+};
+ 
+struct dcookie_user * dcookie_register(void)
+{
+	struct dcookie_user * user;
+
+	down(&dcookie_sem);
+
+	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
+	if (!user)
+		goto out;
+
+	if (!is_live() && dcookie_init())
+		goto out_free;
+
+	list_add(&user->next, &dcookie_users);
+
+out:
+	up(&dcookie_sem);
+	return user;
+out_free:
+	kfree(user);
+	user = NULL;
+	goto out;
+}
+
+
+void dcookie_unregister(struct dcookie_user * user)
+{
+	down(&dcookie_sem);
+
+	list_del(&user->next);
+	kfree(user);
+
+	if (!is_live())
+		dcookie_exit();
+
+	up(&dcookie_sem);
+}
+
+EXPORT_SYMBOL_GPL(dcookie_register);
+EXPORT_SYMBOL_GPL(dcookie_unregister);
+EXPORT_SYMBOL_GPL(get_dcookie);
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 72b3148cb038..df2fa4d8d0fd 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -490,11 +490,11 @@ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 }
 
 /*
- *	get_index()
+ *	read_index()
  *
  *	reads a directory table slot
  */
-static int get_index(struct inode *ip, u32 index,
+static int read_index(struct inode *ip, u32 index,
 		     struct dir_table_slot * dirtab_slot)
 {
 	struct metapage *mp = 0;
@@ -2978,7 +2978,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				return 0;
 			}
 		      repeat:
-			rc = get_index(ip, dir_index, &dirtab_slot);
+			rc = read_index(ip, dir_index, &dirtab_slot);
 			if (rc) {
 				filp->f_pos = DIREND;
 				return rc;
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 223d700da927..7859b2f22d28 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -478,12 +478,12 @@ int readSuper(struct super_block *sb, struct buffer_head **bpp)
 {
 	/* read in primary superblock */
 	*bpp = sb_bread(sb, SUPER1_OFF >> sb->s_blocksize_bits);
-	if (bpp)
+	if (*bpp)
 		return 0;
 
 	/* read in secondary/replicated superblock */
 	*bpp = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
-	if (bpp)
+	if (*bpp)
 		return 0;
 
 	return -EIO;
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 836322c2be06..c098a522553b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-objs		:= $(nfs-y)
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3443f647ed2f..f02b7c9c7f36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -259,6 +259,12 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	if (!inode)
 		return -EINVAL;
 
+	/* This will be in a forthcoming patch. */
+	if (NFS_PROTO(inode)->version == 4) {
+		printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n");
+		return -EIO;
+	}
+
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f7e1e442c9e7..39027f2af310 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -28,6 +28,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
 #include <linux/nfs_flushd.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
@@ -76,8 +77,13 @@ static struct rpc_version *	nfs_version[] = {
 	NULL,
 	NULL,
 	&nfs_version2,
-#ifdef CONFIG_NFS_V3
+#if defined(CONFIG_NFS_V3)
 	&nfs_version3,
+#elif defined(CONFIG_NFS_V4)
+	NULL,
+#endif
+#if defined(CONFIG_NFS_V4)
+	&nfs_version4,
 #endif
 };
 
@@ -157,6 +163,7 @@ nfs_put_super(struct super_block *sb)
 		lockd_down();	/* release rpc.lockd */
 	rpciod_down();		/* release rpciod */
 
+	destroy_nfsv4_state(server);
 	kfree(server->hostname);
 }
 
@@ -234,6 +241,120 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh)
 }
 
 /*
+ * Do NFS version-independent mount processing, and sanity checking
+ */
+int nfs_sb_init(struct super_block *sb)
+{
+	struct nfs_server	*server;
+	struct inode		*root_inode = NULL;
+	struct nfs_fattr	fattr;
+	struct nfs_fsinfo	fsinfo = {
+					.fattr = &fattr,
+				};
+	struct nfs_pathconf pathinfo = {
+			.fattr = &fattr,
+	};
+
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+	server = NFS_SB(sb);
+
+	sb->s_magic      = NFS_SUPER_MAGIC;
+	sb->s_op         = &nfs_sops;
+	INIT_LIST_HEAD(&server->lru_read);
+	INIT_LIST_HEAD(&server->lru_dirty);
+	INIT_LIST_HEAD(&server->lru_commit);
+	INIT_LIST_HEAD(&server->lru_busy);
+
+	/* Did getting the root inode fail? */
+	root_inode = nfs_get_root(sb, &server->fh);
+	if (!root_inode)
+		goto out_no_root;
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root)
+		goto out_no_root;
+
+	sb->s_root->d_op = &nfs_dentry_operations;
+
+	/* Get some general file system info */
+        if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) {
+		printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
+		goto out_no_root;
+        }
+	if (server->namelen == 0 &&
+	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+		server->namelen = pathinfo.max_namelen;
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
+	if (sb->s_blocksize == 0) {
+		if (fsinfo.wtmult == 0) {
+			sb->s_blocksize = 512;
+			sb->s_blocksize_bits = 9;
+		} else
+			sb->s_blocksize = nfs_block_bits(fsinfo.wtmult,
+							 &sb->s_blocksize_bits);
+	}
+
+	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
+		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
+	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
+		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
+
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (server->rpages > NFS_READ_MAXIOV) {
+		server->rpages = NFS_READ_MAXIOV;
+		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
+	}
+
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+        if (server->wpages > NFS_WRITE_MAXIOV) {
+		server->wpages = NFS_WRITE_MAXIOV;
+                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
+	}
+
+	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+		sb->s_flags |= MS_SYNCHRONOUS;
+	}
+
+	sb->s_maxbytes = fsinfo.maxfilesize;
+	if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
+		sb->s_maxbytes = MAX_LFS_FILESIZE; 
+
+	/* Fire up the writeback cache */
+	if (nfs_reqlist_alloc(server) < 0) {
+		printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n");
+		goto failure_kill_reqlist;
+	}
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+	return 0;
+	/* Yargs. It didn't work out. */
+failure_kill_reqlist:
+	nfs_reqlist_exit(server);
+out_free_all:
+	if (root_inode)
+		iput(root_inode);
+	nfs_reqlist_free(server);
+	return -EINVAL;
+out_no_root:
+	printk("nfs_read_super: get root inode failed\n");
+	goto out_free_all;
+}
+
+/*
  * The way this works is that the mount process passes a structure
  * in the data argument which contains the server's IP address
  * and the root file handle obtained from the server's mount
@@ -244,29 +365,20 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	struct nfs_server	*server;
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
-	struct inode		*root_inode = NULL;
-	rpc_authflavor_t	authflavor;
 	struct rpc_timeout	timeparms;
-	struct nfs_fsinfo	fsinfo;
-	int			tcp, version, maxlen;
+	int			tcp, err = -EIO;
 
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	sb->s_magic      = NFS_SUPER_MAGIC;
-	sb->s_op         = &nfs_sops;
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize  = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
 	server           = NFS_SB(sb);
-	server->rsize    = nfs_block_size(data->rsize, NULL);
-	server->wsize    = nfs_block_size(data->wsize, NULL);
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
 	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
 
-	if (data->flags & NFS_MOUNT_NOAC) {
-		data->acregmin = data->acregmax = 0;
-		data->acdirmin = data->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
-	}
 	server->acregmin = data->acregmin*HZ;
 	server->acregmax = data->acregmax*HZ;
 	server->acdirmin = data->acdirmin*HZ;
@@ -275,34 +387,26 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	server->namelen  = data->namlen;
 	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
 	if (!server->hostname)
-		goto out_unlock;
+		goto out_fail;
 	strcpy(server->hostname, data->hostname);
-	INIT_LIST_HEAD(&server->lru_read);
-	INIT_LIST_HEAD(&server->lru_dirty);
-	INIT_LIST_HEAD(&server->lru_commit);
-	INIT_LIST_HEAD(&server->lru_busy);
 
- nfsv3_try_again:
-	server->caps = 0;
 	/* Check NFS protocol revision and initialize RPC op vector
 	 * and file handle pool. */
-	if (data->flags & NFS_MOUNT_VER3) {
+	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3
 		server->rpc_ops = &nfs_v3_clientops;
-		version = 3;
 		server->caps |= NFS_CAP_READDIRPLUS;
 		if (data->version < 4) {
 			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
-			goto out_unlock;
+			goto out_fail;
 		}
 #else
 		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
-		goto out_unlock;
+		goto out_fail;
 #endif
 	} else {
 		server->rpc_ops = &nfs_v2_clientops;
-		version = 2;
-        }
+	}
 
 	/* Which protocol do we use? */
 	tcp   = (data->flags & NFS_MOUNT_TCP);
@@ -321,155 +425,54 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	/* Now create transport and client */
 	xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
 						&server->addr, &timeparms);
-	if (xprt == NULL)
-		goto out_no_xprt;
-
-	/* Choose authentication flavor */
-	authflavor = RPC_AUTH_UNIX;
-	if (data->flags & NFS_MOUNT_SECURE)
-		authflavor = RPC_AUTH_DES;
-	else if (data->flags & NFS_MOUNT_KERBEROS)
-		authflavor = RPC_AUTH_KRB;
-
+	if (xprt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		goto out_fail;
+	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 version, authflavor);
-	if (clnt == NULL)
-		goto out_no_client;
+				 server->rpc_ops->version, RPC_AUTH_UNIX);
+	if (clnt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		xprt_destroy(xprt);
+		goto out_fail;
+	}
 
-	clnt->cl_intr     = (data->flags & NFS_MOUNT_INTR)? 1 : 0;
-	clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0;
-	clnt->cl_droppriv = (data->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0;
+	clnt->cl_intr     = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
+	clnt->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	clnt->cl_droppriv = (server->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0;
 	clnt->cl_chatty   = 1;
 	server->client    = clnt;
 
 	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0)
-		goto out_no_iod;
-
-	/*
-	 * Keep the super block locked while we try to get 
-	 * the root fh attributes.
-	 */
-	/* Did getting the root inode fail? */
-	if (!(root_inode = nfs_get_root(sb, &server->fh))
-	    && (data->flags & NFS_MOUNT_VER3)) {
-		data->flags &= ~NFS_MOUNT_VER3;
-		rpciod_down();
-		rpc_shutdown_client(server->client);
-		goto nfsv3_try_again;
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		goto out_shutdown;
 	}
 
-	if (!root_inode)
-		goto out_no_root;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_no_root;
+	err = nfs_sb_init(sb);
+	if (err != 0)
+		goto out_noinit;
 
-	sb->s_root->d_op = &nfs_dentry_operations;
-
-	/* Get some general file system info */
-        if (server->rpc_ops->statfs(server, &server->fh, &fsinfo) >= 0) {
-		if (server->namelen == 0)
-			server->namelen = fsinfo.namelen;
+	if (server->flags & NFS_MOUNT_VER3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
 	} else {
-		printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
-		goto out_no_root;
-        }
-
-	/* Work out a lot of parameters */
-	if (data->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (data->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-	/* NFSv3: we don't have bsize, but rather rtmult and wtmult... */
-	if (!fsinfo.bsize)
-		fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult;
-	/* Also make sure we don't go below rsize/wsize since
-	 * RPC calls are expensive */
-	if (fsinfo.bsize < server->rsize)
-		fsinfo.bsize = server->rsize;
-	if (fsinfo.bsize < server->wsize)
-		fsinfo.bsize = server->wsize;
-
-	if (data->bsize == 0)
-		sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits);
-	if (server->rsize > fsinfo.rtmax)
-		server->rsize = fsinfo.rtmax;
-	if (server->wsize > fsinfo.wtmax)
-		server->wsize = fsinfo.wtmax;
-
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (server->rpages > NFS_READ_MAXIOV) {
-		server->rpages = NFS_READ_MAXIOV;
-		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
 	}
 
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        if (server->wpages > NFS_WRITE_MAXIOV) {
-		server->wpages = NFS_WRITE_MAXIOV;
-                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
-	}
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-        maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN;
-
-        if (server->namelen == 0 || server->namelen > maxlen)
-                server->namelen = maxlen;
-
-	sb->s_maxbytes = fsinfo.maxfilesize;
-	if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
-		sb->s_maxbytes = MAX_LFS_FILESIZE; 
-
-	/* Fire up the writeback cache */
-	if (nfs_reqlist_alloc(server) < 0) {
-		printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n");
-		goto failure_kill_reqlist;
-	}
-
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(clnt, server->wsize + 100, server->rsize + 100);
-
 	/* Check whether to start the lockd process */
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_up();
 	return 0;
-
-	/* Yargs. It didn't work out. */
- failure_kill_reqlist:
-	nfs_reqlist_exit(server);
-out_no_root:
-	printk("nfs_read_super: get root inode failed\n");
-	iput(root_inode);
+out_noinit:
 	rpciod_down();
-	goto out_shutdown;
-
-out_no_iod:
-	printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
 out_shutdown:
 	rpc_shutdown_client(server->client);
-	goto out_free_host;
-
-out_no_client:
-	printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-	xprt_destroy(xprt);
-	goto out_free_host;
-
-out_no_xprt:
-	printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
-
-out_free_host:
-	nfs_reqlist_free(server);
-	kfree(server->hostname);
-out_unlock:
-	goto out_fail;
-
 out_fail:
-	return -EINVAL;
+	if (server->hostname)
+		kfree(server->hostname);
+	return err;
 }
 
 static int
@@ -478,29 +481,30 @@ nfs_statfs(struct super_block *sb, struct statfs *buf)
 	struct nfs_server *server = NFS_SB(sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fsinfo res;
+	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fattr fattr;
+	struct nfs_fsstat res = {
+			.fattr = &fattr,
+	};
 	int error;
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res);
+	error = server->rpc_ops->statfs(server, rootfh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
 
-	if (res.bsize == 0)
-		res.bsize = sb->s_blocksize;
-	buf->f_bsize = nfs_block_bits(res.bsize, &blockbits);
+	buf->f_bsize = sb->s_blocksize;
+	blockbits = sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
 	buf->f_bavail = (res.abytes + blockres) >> blockbits;
 	buf->f_files = res.tfiles;
 	buf->f_ffree = res.afiles;
-	if (res.namelen == 0 || res.namelen > server->namelen)
-		res.namelen = server->namelen;
-	buf->f_namelen = res.namelen;
 
+	buf->f_namelen = server->namelen;
  out:
 	unlock_kernel();
 
@@ -1286,6 +1290,239 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME,
 };
 
+#ifdef CONFIG_NFS_V4
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+	struct nfs_server *server;
+	struct rpc_xprt *xprt = NULL;
+	struct rpc_clnt *clnt = NULL;
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavour;
+	int proto, err = -EIO;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	server = NFS_SB(sb);
+	if (data->rsize != 0)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize != 0)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	/* NFSv4 doesn't use NLM locking */
+	server->flags |= NFS_MOUNT_NONLM;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	server->rpc_ops = &nfs_v4_clientops;
+	/* Initialize timeout values */
+
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_exponential = 1;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	proto = data->proto;
+	/* Which IP protocol do we use? */
+	switch (proto) {
+	case IPPROTO_TCP:
+		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 600 * HZ / 10;
+		break;
+	case IPPROTO_UDP:
+		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 11 * HZ / 10;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Now create transport and client */
+	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+	if (xprt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		goto out_fail;
+	}
+
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen > 1)
+			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (copy_from_user(authflavour, data->auth_flavours, sizeof(authflavour))) {
+			err = -EFAULT;
+			goto out_fail;
+		}
+	}
+	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				 server->rpc_ops->version, authflavour);
+	if (clnt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		xprt_destroy(xprt);
+		goto out_fail;
+	}
+
+	clnt->cl_intr     = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0;
+	clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0;
+	clnt->cl_chatty   = 1;
+	server->client    = clnt;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		goto out_shutdown;
+	}
+
+	if (create_nfsv4_state(server, data))
+		goto out_shutdown;
+
+	err = nfs_sb_init(sb);
+	if (err == 0)
+		return 0;
+	rpciod_down();
+	destroy_nfsv4_state(server);
+out_shutdown:
+	rpc_shutdown_client(server->client);
+out_fail:
+	return err;
+}
+
+static int nfs4_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data;
+	struct nfs_server *old = NFS_SB(sb);
+
+	if (strcmp(server->hostname, old->hostname) != 0)
+		return 0;
+	if (strcmp(server->mnt_path, old->mnt_path) != 0)
+		return 0;
+	return 1;
+}
+
+static void *
+nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+{
+	void *p = NULL;
+
+	if (!src->len)
+		return ERR_PTR(-EINVAL);
+	if (src->len < maxlen)
+		maxlen = src->len;
+	if (dst == NULL) {
+		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+		if (p == NULL)
+			return ERR_PTR(-ENOMEM);
+	}
+	if (copy_from_user(dst, src->data, maxlen)) {
+		if (p != NULL)
+			kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	dst[maxlen] = '\0';
+	return dst;
+}
+
+static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, char *dev_name, void *raw_data)
+{
+	int error;
+	struct nfs_server *server;
+	struct super_block *s;
+	struct nfs4_mount_data *data = raw_data;
+	void *p;
+
+	if (!data) {
+		printk("nfs_read_super: missing data argument\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+	memset(server, 0, sizeof(struct nfs_server));
+
+	if (data->version != NFS4_MOUNT_VERSION) {
+		printk("nfs warning: mount version %s than kernel\n",
+			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+	}
+
+	p = nfs_copy_user_string(NULL, &data->hostname, 256);
+	if (IS_ERR(p))
+		goto out_err;
+	server->hostname = p;
+
+	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+	if (IS_ERR(p))
+		goto out_err;
+	server->mnt_path = p;
+
+	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
+			sizeof(server->ip_addr));
+	if (IS_ERR(p))
+		goto out_err;
+
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(server->addr)) {
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
+		s = ERR_PTR(-EFAULT);
+		goto out_free;
+	}
+	if (server->addr.sin_family != AF_INET ||
+	    server->addr.sin_addr.s_addr == INADDR_ANY) {
+		printk("NFS: mount program didn't pass remote IP address!\n");
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+
+	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+
+	if (IS_ERR(s) || s->s_root)
+		goto out_free;
+
+	s->s_flags = flags;
+
+	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	if (error) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return s;
+out_err:
+	s = (struct super_block *)p;
+out_free:
+	if (server->mnt_path)
+		kfree(server->mnt_path);
+	if (server->hostname)
+		kfree(server->hostname);
+	kfree(server);
+	return s;
+}
+
+static struct file_system_type nfs4_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_get_sb,
+	.kill_sb	= nfs_kill_super,
+	.fs_flags	= FS_ODD_RENAME,
+};
+#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
+#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+#else
+#define register_nfs4fs() (0)
+#define unregister_nfs4fs()
+#endif
+
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
@@ -1377,6 +1614,8 @@ static int __init init_nfs_fs(void)
         err = register_filesystem(&nfs_fs_type);
 	if (err)
 		goto out;
+	if ((err = register_nfs4fs()) != 0)
+		goto out;
 	return 0;
 out:
 	rpc_proc_unregister("nfs");
@@ -1401,6 +1640,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_filesystem(&nfs_fs_type);
+	unregister_nfs4fs();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8dc92b8b3a1c..8e652afdfea4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -596,37 +596,18 @@ nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode STATFS reply
  */
 static int
-nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
 {
 	int	status;
-	u32	xfer_size;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 
-	/* For NFSv2, we more or less have to guess the preferred
-	 * read/write/readdir sizes from the single 'transfer size'
-	 * value.
-	 */
-	xfer_size = ntohl(*p++);	/* tsize */
-	res->rtmax  = 8 * 1024;
-	res->rtpref = xfer_size;
-	res->rtmult = xfer_size;
-	res->wtmax  = 8 * 1024;
-	res->wtpref = xfer_size;
-	res->wtmult = xfer_size;
-	res->dtpref = PAGE_CACHE_SIZE;
-	res->maxfilesize = 0x7FFFFFFF;	/* just a guess */
+	res->tsize  = ntohl(*p++);
 	res->bsize  = ntohl(*p++);
-
-	res->tbytes = ntohl(*p++) * res->bsize;
-	res->fbytes = ntohl(*p++) * res->bsize;
-	res->abytes = ntohl(*p++) * res->bsize;
-	res->tfiles = 0;
-	res->ffiles = 0;
-	res->afiles = 0;
-	res->namelen = 0;
-
+	res->blocks = ntohl(*p++);
+	res->bfree  = ntohl(*p++);
+	res->bavail = ntohl(*p++);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1ddb51374cba..790c27ead44f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -639,24 +639,42 @@ nfs3_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	return status;
 }
 
-/*
- * This is a combo call of fsstat and fsinfo
- */
 static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-		 struct nfs_fsinfo *info)
+		 struct nfs_fsstat *stat)
 {
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0);
-	if (status < 0)
-		goto error;
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	return status;
+}
 
-error:
-	dprintk("NFS reply statfs: %d\n", status);
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *info)
+{
+	int	status;
+
+	dprintk("NFS call  pathconf\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
 
@@ -824,6 +842,8 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.readdir	= nfs3_proc_readdir,
 	.mknod		= nfs3_proc_mknod,
 	.statfs		= nfs3_proc_statfs,
+	.fsinfo		= nfs3_proc_fsinfo,
+	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
 	.write_setup	= nfs3_proc_write_setup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b0c77b19fff9..2a813fb65365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -912,14 +912,13 @@ nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
  * Decode FSSTAT reply
  */
 static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -940,12 +939,11 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 static int
 nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -959,6 +957,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 	p = xdr_decode_hyper(p, &res->maxfilesize);
 
 	/* ignore time_delta and properties */
+	res->lease_time = 0;
 	return 0;
 }
 
@@ -966,18 +965,17 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
  * Decode PATHCONF reply
  */
 static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
-	res->linkmax = ntohl(*p++);
-	res->namelen = ntohl(*p++);
+	res->max_link = ntohl(*p++);
+	res->max_namelen = ntohl(*p++);
 
 	/* ignore remaining fields */
 	return 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 000000000000..8608fd9b3a30
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,1577 @@
+/*
+ *  fs/nfs/nfs4proc.c
+ *
+ *  Client-side procedure declarations for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+#define GET_OP(cp,name)		&cp->ops[cp->req_nops].u.name
+#define OPNUM(cp)		cp->ops[cp->req_nops].opnum
+
+extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+
+static nfs4_stateid zero_stateid =
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED;
+
+static void
+nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops,
+		    struct nfs_server *server, char *tag)
+{
+	memset(cp, 0, sizeof(*cp));
+	cp->ops = ops;
+	cp->server = server;
+
+#if NFS4_DEBUG
+	cp->taglen = strlen(tag);
+	cp->tag = tag;
+#endif
+}
+
+static void
+nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access)
+{
+	struct nfs4_access *access = GET_OP(cp, access);
+	
+	access->ac_req_access = req_access;
+	access->ac_resp_supported = resp_supported;
+	access->ac_resp_access = resp_access;
+	
+	OPNUM(cp) = OP_ACCESS;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_close(struct nfs4_compound *cp, nfs4_stateid stateid, u32 seqid)
+{
+	struct nfs4_close *close = GET_OP(cp, close);
+
+	close->cl_stateid = stateid;
+	close->cl_seqid = seqid;
+
+	OPNUM(cp) = OP_CLOSE;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_commit(struct nfs4_compound *cp, u64 start, u32 len, struct nfs_writeverf *verf)
+{
+	struct nfs4_commit *commit = GET_OP(cp, commit);
+
+	commit->co_start = start;
+	commit->co_len = len;
+	commit->co_verifier = verf;
+
+	OPNUM(cp) = OP_COMMIT;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name,
+		      struct iattr *sattr, struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+	
+	create->cr_ftype = NF4DIR;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+	
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name,
+			  struct qstr *linktext, struct iattr *sattr,
+			  struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	create->cr_ftype = NF4LNK;
+	create->cr_textlen = linktext->len;
+	create->cr_text = linktext->name;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name,
+			    dev_t dev, struct iattr *sattr,
+			    struct nfs4_change_info *info)
+{
+	int mode = sattr->ia_mode;
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+	
+	if (S_ISFIFO(mode))
+		create->cr_ftype = NF4FIFO;
+	else if (S_ISBLK(mode)) {
+		create->cr_ftype = NF4BLK;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else if (S_ISCHR(mode)) {
+		create->cr_ftype = NF4CHR;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else
+		create->cr_ftype = NF4SOCK;
+	
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+/*
+ * This is our standard bitmap for GETATTR requests.
+ */
+u32 nfs4_fattr_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+u32 nfs4_statfs_bitmap[2] = {
+	FATTR4_WORD0_FILES_AVAIL
+	| FATTR4_WORD0_FILES_FREE
+	| FATTR4_WORD0_FILES_TOTAL,
+	FATTR4_WORD1_SPACE_AVAIL
+	| FATTR4_WORD1_SPACE_FREE
+	| FATTR4_WORD1_SPACE_TOTAL
+};
+
+u32 nfs4_fsinfo_bitmap[2] = {
+	FATTR4_WORD0_MAXFILESIZE
+	| FATTR4_WORD0_MAXREAD
+        | FATTR4_WORD0_MAXWRITE
+	| FATTR4_WORD0_LEASE_TIME,
+	0
+};
+
+u32 nfs4_pathconf_bitmap[2] = {
+	FATTR4_WORD0_MAXLINK
+	| FATTR4_WORD0_MAXNAME,
+	0
+};
+
+/* mount bitmap: fattr bitmap + lease time */
+u32 nfs4_mount_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID
+	| FATTR4_WORD0_LEASE_TIME,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+static inline void
+__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap,
+		     struct nfs_fattr *fattr,
+		     struct nfs_fsstat *fsstat,
+		     struct nfs_fsinfo *fsinfo,
+		     struct nfs_pathconf *pathconf,
+		     u32 *bmres)
+{
+        struct nfs4_getattr *getattr = GET_OP(cp, getattr);
+
+        getattr->gt_bmval = bitmap;
+        getattr->gt_attrs = fattr;
+	getattr->gt_fsstat = fsstat;
+	getattr->gt_fsinfo = fsinfo;
+	getattr->gt_pathconf = pathconf;
+	getattr->gt_bmres = bmres;
+
+        OPNUM(cp) = OP_GETATTR;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_getattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr,
+			NULL, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_getrootattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_mount_bitmap,
+			fattr, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_statfs(struct nfs4_compound *cp,
+		struct nfs_fsstat *fsstat,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_statfs_bitmap,
+			NULL, fsstat, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_fsinfo(struct nfs4_compound *cp,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap,
+			NULL, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_pathconf(struct nfs4_compound *cp,
+		struct nfs_pathconf *pathconf,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_pathconf_bitmap,
+			NULL, NULL, NULL, pathconf, bmres);
+}
+
+static void
+nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_getfh *getfh = GET_OP(cp, getfh);
+
+	getfh->gf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_GETFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name,
+		struct nfs4_change_info *info)
+{
+	struct nfs4_link *link = GET_OP(cp, link);
+
+	link->ln_namelen = name->len;
+	link->ln_name = name->name;
+	link->ln_cinfo = info;
+
+	OPNUM(cp) = OP_LINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q)
+{
+	struct nfs4_lookup *lookup = GET_OP(cp, lookup);
+
+	lookup->lo_name = q;
+
+	OPNUM(cp) = OP_LOOKUP;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_putfh *putfh = GET_OP(cp, putfh);
+
+	putfh->pf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_PUTFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putrootfh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_PUTROOTFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_open(struct nfs4_compound *cp, int flags, struct qstr *name,
+		struct iattr *sattr, char *stateid, struct nfs4_change_info *cinfo,
+		u32 *rflags)
+{
+	struct nfs4_open *open = GET_OP(cp, open);
+
+	BUG_ON(cp->flags);
+	
+	open->op_share_access = flags & 3;
+	open->op_opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE;
+	open->op_createmode = NFS4_CREATE_UNCHECKED;
+	open->op_attrs = sattr;
+	if (flags & O_EXCL) {
+		u32 *p = (u32 *) open->op_verifier;
+		p[0] = jiffies;
+		p[1] = current->pid;
+		open->op_createmode = NFS4_CREATE_EXCLUSIVE;
+	}
+	open->op_name = name;
+	open->op_stateid = stateid;
+	open->op_cinfo = cinfo;
+	open->op_rflags = rflags;
+
+	OPNUM(cp) = OP_OPEN;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_open_confirm(struct nfs4_compound *cp, char *stateid)
+{
+	struct nfs4_open_confirm *open_confirm = GET_OP(cp, open_confirm);
+	
+	open_confirm->oc_stateid = stateid;
+
+	OPNUM(cp) = OP_OPEN_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_read(struct nfs4_compound *cp, u64 offset, u32 length,
+		struct page **pages, unsigned int pgbase, u32 *eofp, u32 *bytes_read)
+{
+	struct nfs4_read *read = GET_OP(cp, read);
+
+	read->rd_offset = offset;
+	read->rd_length = length;
+	read->rd_pages = pages;
+	read->rd_pgbase = pgbase;
+	read->rd_eof = eofp;
+	read->rd_bytes_read = bytes_read;
+
+	OPNUM(cp) = OP_READ;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier,
+		     struct page **pages, unsigned int bufsize, struct dentry *dentry)
+{
+	u32 *start, *p;
+	struct nfs4_readdir *readdir = GET_OP(cp, readdir);
+
+	BUG_ON(bufsize < 80);
+	readdir->rd_cookie = (cookie > 2) ? cookie : 0;
+	memcpy(readdir->rd_req_verifier, verifier, sizeof(nfs4_verifier));
+	readdir->rd_count = bufsize;
+	readdir->rd_bmval[0] = FATTR4_WORD0_FILEID;
+	readdir->rd_bmval[1] = 0;
+	readdir->rd_pages = pages;
+	readdir->rd_pgbase = 0;
+	
+	OPNUM(cp) = OP_READDIR;
+	cp->req_nops++;
+
+	if (cookie >= 2)
+		return;
+	
+	/*
+	 * NFSv4 servers do not return entries for '.' and '..'
+	 * Therefore, we fake these entries here.  We let '.'
+	 * have cookie 0 and '..' have cookie 1.  Note that
+	 * when talking to the server, we always send cookie 0
+	 * instead of 1 or 2.
+	 */
+	start = p = (u32 *)kmap(*pages);
+	
+	if (cookie == 0) {
+		*p++ = xdr_one;                                  /* next */
+		*p++ = xdr_zero;                   /* cookie, first word */
+		*p++ = xdr_one;                   /* cookie, second word */
+		*p++ = xdr_one;                             /* entry len */
+		memcpy(p, ".\0\0\0", 4);                        /* entry */
+		p++;
+		*p++ = xdr_one;                         /* bitmap length */
+		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+		*p++ = htonl(8);              /* attribute buffer length */
+		p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
+	}
+	
+	*p++ = xdr_one;                                  /* next */
+	*p++ = xdr_zero;                   /* cookie, first word */
+	*p++ = xdr_two;                   /* cookie, second word */
+	*p++ = xdr_two;                             /* entry len */
+	memcpy(p, "..\0\0", 4);                         /* entry */
+	p++;
+	*p++ = xdr_one;                         /* bitmap length */
+	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+	*p++ = htonl(8);              /* attribute buffer length */
+	p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+
+	readdir->rd_pgbase = (char *)p - (char *)start;
+	readdir->rd_count -= readdir->rd_pgbase;
+	kunmap(*pages);
+}
+
+static void
+nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages)
+{
+	struct nfs4_readlink *readlink = GET_OP(cp, readlink);
+
+	readlink->rl_count = count;
+	readlink->rl_pages = pages;
+
+	OPNUM(cp) = OP_READLINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo)
+{
+	struct nfs4_remove *remove = GET_OP(cp, remove);
+
+	remove->rm_namelen = name->len;
+	remove->rm_name = name->name;
+	remove->rm_cinfo = cinfo;
+
+	OPNUM(cp) = OP_REMOVE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new,
+		  struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo)
+{
+	struct nfs4_rename *rename = GET_OP(cp, rename);
+
+	rename->rn_oldnamelen = old->len;
+	rename->rn_oldname = old->name;
+	rename->rn_newnamelen = new->len;
+	rename->rn_newname = new->name;
+	rename->rn_src_cinfo = old_cinfo;
+	rename->rn_dst_cinfo = new_cinfo;
+
+	OPNUM(cp) = OP_RENAME;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_renew(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_RENEW;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_restorefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_RESTOREFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_savefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_SAVEFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_setattr(struct nfs4_compound *cp, char *stateid, struct iattr *iap)
+{
+	struct nfs4_setattr *setattr = GET_OP(cp, setattr);
+
+	setattr->st_stateid = stateid;
+	setattr->st_iap = iap;
+	
+	OPNUM(cp) = OP_SETATTR;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port)
+{
+	struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid);
+	struct nfs_server *server = cp->server;
+	struct timeval tv;
+	u32 *p;
+
+	do_gettimeofday(&tv);
+	p = (u32 *)setclientid->sc_verifier;
+	*p++ = tv.tv_sec;
+	*p++ = tv.tv_usec;
+	setclientid->sc_name = server->ip_addr;
+	sprintf(setclientid->sc_netid, "udp");
+	sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255);
+	setclientid->sc_prog = program;
+	setclientid->sc_cb_ident = 0;
+	
+	OPNUM(cp) = OP_SETCLIENTID;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid_confirm(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_SETCLIENTID_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_write(struct nfs4_compound *cp, u64 offset, u32 length, int stable,
+		 struct page **pages, unsigned int pgbase, u32 *bytes_written,
+		 struct nfs_writeverf *verf)
+{
+	struct nfs4_write *write = GET_OP(cp, write);
+
+	write->wr_offset = offset;
+	write->wr_stable_how = stable;
+	write->wr_len = length;
+	write->wr_bytes_written = bytes_written;
+	write->wr_verf = verf;
+
+	write->wr_pages = pages;
+	write->wr_pgbase = pgbase;
+
+	OPNUM(cp) = OP_WRITE;
+	cp->req_nops++;
+}
+
+static inline void
+process_lease(struct nfs4_compound *cp)
+{
+	struct nfs_server *server;
+	
+        /*
+         * Generic lease processing: If this operation contains a
+	 * lease-renewing operation, and it succeeded, update the RENEW time
+	 * in the superblock.  Instead of the current time, we use the time
+	 * when the request was sent out.  (All we know is that the lease was
+	 * renewed sometime between then and now, and we have to assume the
+	 * worst case.)
+	 *
+	 * Notes:
+	 *   (1) renewd doesn't acquire the spinlock when messing with
+	 *     server->last_renewal; this is OK since rpciod always runs
+	 *     under the BKL.
+	 *   (2) cp->timestamp was set at the end of XDR encode.
+         */
+	if (!cp->renew_index)
+		return;
+	if (!cp->toplevel_status || cp->resp_nops > cp->renew_index) {
+		server = cp->server;
+		spin_lock(&renew_lock);
+		if (server->last_renewal < cp->timestamp)
+			server->last_renewal = cp->timestamp;
+		spin_unlock(&renew_lock);
+	}
+}
+
+static int
+nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags)
+{
+	int status;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = cred,
+	};
+
+	status = rpc_call_sync(cp->server->client, &msg, flags);
+	if (!status)
+		process_lease(cp);
+	
+	return status;
+}
+
+static inline void
+process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr)
+{
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0);
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR_V4) == 0);
+	
+	if (fattr->change_attr == info->after) {
+		fattr->pre_change_attr = info->before;
+		fattr->valid |= NFS_ATTR_PRE_CHANGE;
+		fattr->timestamp = jiffies;
+	}
+}
+
+static int
+do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr,
+	struct nfs_fattr *fattr, struct nfs_fh *fhandle, u32 *seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	u32			rflags;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_open(&compound, flags, name, sattr, stateid, &dir_cinfo, &rflags);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		return status;
+
+	process_cinfo(&dir_cinfo, &dir_attr);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (!(rflags & NFS4_OPEN_RESULT_CONFIRM)) {
+		*seqid = 1;
+		return 0;
+	}
+	*seqid = 2;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open_confirm");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_open_confirm(&compound, stateid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+	   struct nfs_fh *fhandle, struct iattr *sattr, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	u32			bmres[2];
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, server, "setattr");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_setattr(&compound, stateid, sattr);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_close(struct nfs_server *server, struct nfs_fh *fhandle, u32 seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	
+	nfs4_setup_compound(&compound, ops, server, "close");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_close(&compound, stateid, seqid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[4];
+	struct nfs_fsinfo	fsinfo;
+	u32			bmres[2];
+	unsigned char *		p;
+	struct qstr		q;
+	int			status;
+
+	fattr->valid = 0;
+
+	if (!(server->nfs4_state = nfs4_get_client()))
+		return -ENOMEM;
+
+	/* 
+	 * SETCLIENTID.
+	 * Until delegations are imported, we don't bother setting the program
+	 * number and port to anything meaningful.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid");
+	nfs4_setup_setclientid(&compound, 0, 0);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+
+	/*
+	 * SETCLIENTID_CONFIRM, plus root filehandle.
+	 * We also get the lease time here.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid_confirm");
+	nfs4_setup_setclientid_confirm(&compound);
+	nfs4_setup_putrootfh(&compound);
+	nfs4_setup_getrootattr(&compound, fattr, &fsinfo, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+	
+	/*
+	 * Now that we have instantiated the clientid and determined
+	 * the lease time, we can initialize the renew daemon for this
+	 * server.
+	 */
+	server->lease_time = fsinfo.lease_time * HZ;
+	if ((status = nfs4_init_renewd(server)))
+		goto out;
+	
+	/*
+	 * Now we do a seperate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done seperately so that we can conveniently
+	 * catch an ERR_WRONGSEC if it occurs along the way...
+	 */
+	p = server->mnt_path;
+	for (;;) {
+		while (*p == '/')
+			p++;
+		if (!*p)
+			break;
+		q.name = p;
+		while (*p && (*p != '/'))
+			p++;
+		q.len = p - q.name;
+
+		nfs4_setup_compound(&compound, ops, server, "mount");
+		nfs4_setup_putfh(&compound, fhandle);
+		nfs4_setup_lookup(&compound, &q);
+		nfs4_setup_getattr(&compound, fattr, bmres);
+		nfs4_setup_getfh(&compound, fhandle);
+		status = nfs4_call_compound(&compound, NULL, 0);
+		if (!status)
+			continue;
+		if (status == -ENOENT) {
+			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
+			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
+		}
+		break;
+	}
+
+out:
+	return status;
+}
+
+static int
+nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	fattr->valid = 0;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		  struct iattr *sattr)
+{
+	struct inode *		inode = dentry->d_inode;
+	int			size_change = sattr->ia_valid & ATTR_SIZE;
+	struct nfs_fh		throwaway_fh;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int			status;
+
+	fattr->valid = 0;
+	
+	if (size_change) {
+		status = do_open(dentry->d_parent->d_inode, &dentry->d_name,
+				 NFS4_SHARE_ACCESS_WRITE, NULL, fattr,
+				 &throwaway_fh, &seqid, stateid);
+		if (status)
+			return status;
+
+		/*
+		 * Because OPEN is always done by name in nfsv4, it is
+		 * possible that we opened a different file by the same
+		 * name.  We can recognize this race condition, but we
+		 * can't do anything about it besides returning an error.
+		 *
+		 * XXX: Should we compare filehandles too, as in
+		 * nfs_find_actor()?
+		 */
+		if (fattr->fileid != NFS_FILEID(inode)) {
+			printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n");
+			do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+			return -EIO;
+		}
+	}
+	else
+		memcpy(stateid, zero_stateid, sizeof(nfs4_stateid));
+	
+	status = do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, stateid);
+	if (size_change)
+		do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+	return status;
+}
+
+static int
+nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[5];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	dprintk("NFS call  lookup %s\n", name->name);
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_lookup(&compound, name);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	status = nfs4_call_compound(&compound, NULL, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+
+	if (status >= 0)
+		status = nfs_refresh_inode(dir, &dir_attr);
+	return status;
+}
+
+static int
+nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs_fattr	fattr;
+	u32			bmres[2];
+	u32			req_access = 0, resp_supported, resp_access;
+	int			status;
+
+	fattr.valid = 0;
+
+	/*
+	 * Determine which access bits we want to ask for...
+	 */
+	if (mode & MAY_READ)
+		req_access |= NFS4_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_LOOKUP;
+	}
+	else {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_EXECUTE;
+	}
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access);
+	status = nfs4_call_compound(&compound, cred, 0);
+	nfs_refresh_inode(inode, &fattr);
+
+	if (!status) {
+		if (req_access != resp_supported) {
+			printk(KERN_NOTICE "NFS: server didn't support all access bits!\n");
+			status = -ENOTSUPP;
+		}
+		else if (req_access != resp_access)
+			status = -EACCES;
+	}
+	return status;
+}
+
+/*
+ * TODO: For the time being, we don't try to get any attributes
+ * along with any of the zero-copy operations READ, READDIR,
+ * READLINK, WRITE.
+ *
+ * In the case of the first three, we want to put the GETATTR
+ * after the read-type operation -- this is because it is hard
+ * to predict the length of a GETATTR response in v4, and thus
+ * align the READ data correctly.  This means that the GETATTR
+ * may end up partially falling into the page cache, and we should
+ * shift it into the 'tail' of the xdr_buf before processing.
+ * To do this efficiently, we need to know the total length
+ * of data received, which doesn't seem to be available outside
+ * of the RPC layer.
+ *
+ * In the case of WRITE, we also want to put the GETATTR after
+ * the operation -- in this case because we want to make sure
+ * we get the post-operation mtime and size.  This means that
+ * we can't use xdr_encode_pages() as written: we need a variant
+ * of it which would leave room in the 'tail' iovec.
+ *
+ * Both of these changes to the XDR layer would in fact be quite
+ * minor, but I decided to leave them for a subsequent patch.
+ */
+static int
+nfs4_proc_readlink(struct inode *inode, struct page *page)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_read(struct inode *inode, struct rpc_cred *cred,
+	       struct nfs_fattr *fattr, int flags,
+	       unsigned int base, unsigned int count,
+	       struct page *page, int *eofp)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_read;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "read [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_read(&compound, offset, count, &page, base, eofp, &bytes_read);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	if (status >= 0)
+		status = bytes_read;
+	return status;
+}
+
+static int
+nfs4_proc_write(struct inode *inode, struct rpc_cred *cred,
+		struct nfs_fattr *fattr, int flags,
+		unsigned int base, unsigned int count,
+		struct page *page, struct nfs_writeverf *verf)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_written;
+	int			stable = (flags & NFS_RW_SYNC) ? NFS_FILE_SYNC : NFS_UNSTABLE;
+	int			rpcflags = (flags & NFS_RW_SWAP) ? NFS_RPC_SWAPFLAGS : 0;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "write [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_write(&compound, offset, count, stable, &page, base, &bytes_written, verf);
+	status = nfs4_call_compound(&compound, cred, rpcflags);
+	
+	if (status >= 0)
+		status = bytes_written;
+	return status;
+}
+
+static int
+nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		 int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	int			oflags;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int 			status;
+
+	oflags = NFS4_SHARE_ACCESS_READ | O_CREAT | (flags & O_EXCL);
+	status = do_open(dir, name, oflags, sattr, fattr, fhandle, &seqid, stateid);
+	if (!status) {
+		if (flags & O_EXCL)
+			status = do_setattr(NFS_SERVER(dir), fattr, fhandle, sattr, stateid);
+		do_close(NFS_SERVER(dir), fhandle, seqid, stateid);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_remove(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+struct unlink_desc {
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	cinfo;
+	struct nfs_fattr	attrs;
+};
+
+static int
+nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct unlink_desc *	up;
+	struct nfs4_compound *	cp;
+	u32			bmres[2];
+
+	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+	cp = &up->compound;
+	
+	nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup");
+	nfs4_setup_putfh(cp, NFS_FH(dir->d_inode));
+	nfs4_setup_remove(cp, name, &up->cinfo);
+	nfs4_setup_getattr(cp, &up->attrs, bmres);
+	
+	msg->rpc_proc = NFSPROC4_COMPOUND;
+	msg->rpc_argp = cp;
+	msg->rpc_resp = cp;
+	return 0;
+}
+
+static int
+nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct unlink_desc *up;
+	
+	if (msg->rpc_argp) {
+		up = (struct unlink_desc *) msg->rpc_argp;
+		process_lease(&up->compound);
+		process_cinfo(&up->cinfo, &up->attrs);
+		nfs_refresh_inode(dir->d_inode, &up->attrs);
+		kfree(up);
+		msg->rpc_argp = NULL;
+	}
+	return 0;
+}
+
+static int
+nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		 struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	old_cinfo, new_cinfo;
+	struct nfs_fattr	old_dir_attr, new_dir_attr;
+	u32			old_dir_bmres[2], new_dir_bmres[2];
+	int			status;
+
+	old_dir_attr.valid = 0;
+	new_dir_attr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename");
+	nfs4_setup_putfh(&compound, NFS_FH(old_dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(new_dir));
+	nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo);
+	nfs4_setup_getattr(&compound, &new_dir_attr, new_dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &old_dir_attr, old_dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&old_cinfo, &old_dir_attr);
+		process_cinfo(&new_cinfo, &new_dir_attr);
+		nfs_refresh_inode(old_dir, &old_dir_attr);
+		nfs_refresh_inode(new_dir, &new_dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr, fattr;
+	u32			dir_bmres[2], bmres[2];
+	int			status;
+	
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_link(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+		nfs_refresh_inode(inode, &fattr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		  struct iattr *sattr, struct nfs_fh *fhandle,
+		  struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+                  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	int			status;
+
+	lock_kernel();
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fh);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *fsstat)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsstat, 0, sizeof(*fsstat));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_statfs(&compound, fsstat, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *fsinfo)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsinfo, 0, sizeof(*fsinfo));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_fsinfo(&compound, fsinfo, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *pathconf)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(pathconf, 0, sizeof(*pathconf));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_pathconf(&compound, pathconf, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static void
+nfs4_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+	process_lease(&data->u.v4.compound);
+	nfs_readpage_result(task, data->u.v4.res_count, data->u.v4.res_eof);
+}
+
+static void
+nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int flags;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "read [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_read(cp, req_offset(req) + req->wb_offset,
+			count, data->pagevec, req->wb_offset,
+			&data->u.v4.res_eof,
+			&data->u.v4.res_count);
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_readdata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_writeback_done(task, data->u.v4.arg_stable,
+			   data->u.v4.arg_count, data->u.v4.res_count);
+}
+
+static void
+nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int stable;
+	int flags;
+	
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "write [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_write(cp, req_offset(req) + req->wb_offset,
+			 count, stable, data->pagevec, req->wb_offset,
+			 &data->u.v4.res_count, &data->verf);
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_commit_done(task);
+}
+
+static void
+nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};	
+	struct inode *inode = data->inode;
+	int flags;
+	
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "commit [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_commit(cp, start, len, &data->verf);
+	
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+	
+	rpc_call_setup(task, &msg, 0);	
+}
+
+/*
+ * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special
+ * standalone procedure for queueing an asynchronous RENEW.
+ */
+struct renew_desc {
+	struct rpc_task		task;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[1];
+};
+
+static void
+renew_done(struct rpc_task *task)
+{
+	struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp;
+	process_lease(cp);
+}
+
+static void
+renew_release(struct rpc_task *task)
+{
+	kfree(task->tk_calldata);
+	task->tk_calldata = NULL;
+}
+
+int
+nfs4_proc_renew(struct nfs_server *server)
+{
+	struct renew_desc *rp;
+	struct rpc_task *task;
+	struct nfs4_compound *cp;
+	struct rpc_message msg;
+
+	rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL);
+	if (!rp)
+		return -ENOMEM;
+	cp = &rp->compound;
+	task = &rp->task;
+	
+	nfs4_setup_compound(cp, rp->ops, server, "renew");
+	nfs4_setup_renew(cp);
+	
+	msg.rpc_proc = NFSPROC4_COMPOUND;
+	msg.rpc_argp = cp;
+	msg.rpc_resp = cp;
+	msg.rpc_cred = NULL;
+	rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC);
+	rpc_call_setup(task, &msg, 0);
+	task->tk_calldata = rp;
+	task->tk_release = renew_release;
+	
+	return rpc_execute(task);
+}
+
+struct nfs_rpc_ops	nfs_v4_clientops = {
+	.version	= 4,			/* protocol version */
+	.getroot	= nfs4_proc_get_root,
+	.getattr	= nfs4_proc_getattr,
+	.setattr	= nfs4_proc_setattr,
+	.lookup		= nfs4_proc_lookup,
+	.access		= nfs4_proc_access,
+	.readlink	= nfs4_proc_readlink,
+	.read		= nfs4_proc_read,
+	.write		= nfs4_proc_write,
+	.commit		= NULL,
+	.create		= nfs4_proc_create,
+	.remove		= nfs4_proc_remove,
+	.unlink_setup	= nfs4_proc_unlink_setup,
+	.unlink_done	= nfs4_proc_unlink_done,
+	.rename		= nfs4_proc_rename,
+	.link		= nfs4_proc_link,
+	.symlink	= nfs4_proc_symlink,
+	.mkdir		= nfs4_proc_mkdir,
+	.rmdir		= nfs4_proc_remove,
+	.readdir	= nfs4_proc_readdir,
+	.mknod		= nfs4_proc_mknod,
+	.statfs		= nfs4_proc_statfs,
+	.fsinfo		= nfs4_proc_fsinfo,
+	.pathconf	= nfs4_proc_pathconf,
+	.decode_dirent	= nfs4_decode_dirent,
+	.read_setup	= nfs4_proc_read_setup,
+	.write_setup	= nfs4_proc_write_setup,
+	.commit_setup	= nfs4_proc_commit_setup,
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 000000000000..4ba871885dbc
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,110 @@
+/*
+ *  fs/nfs/nfs4renewd.c
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server.  The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context.  There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests.  We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished.  Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue");
+
+static void
+renewd(struct rpc_task *task)
+{
+	struct nfs_server *server = (struct nfs_server *)task->tk_calldata;
+	unsigned long lease = server->lease_time;
+	unsigned long last = server->last_renewal;
+	unsigned long timeout;
+
+	if (!server->nfs4_state)
+		timeout = (2 * lease) / 3;
+	else if (jiffies < last + lease/3)
+		timeout = (2 * lease) / 3 + last - jiffies;
+	else {
+		/* Queue an asynchronous RENEW. */
+		nfs4_proc_renew(server);
+		timeout = (2 * lease) / 3;
+	}
+
+	if (timeout < 5 * HZ)    /* safeguard */
+		timeout = 5 * HZ;
+	task->tk_timeout = timeout;
+	task->tk_action = renewd;
+	task->tk_exit = NULL;
+	rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL);
+	return;
+}
+
+int
+nfs4_init_renewd(struct nfs_server *server)
+{
+	struct rpc_task *task;
+	int status;
+
+	lock_kernel();
+	status = -ENOMEM;
+	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		goto out;
+	task->tk_calldata = server;
+	task->tk_action = renewd;
+	status = rpc_execute(task);
+
+out:
+	unlock_kernel();
+	return status;
+}
+
+/*
+ * Local variables:
+ *   c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 000000000000..ecbc54fb1048
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,81 @@
+/*
+ *  fs/nfs/nfs4state.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model.  For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/nfs_fs.h>
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+struct nfs4_client *
+nfs4_get_client(void)
+{
+        struct nfs4_client *clp;
+
+        if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) {
+                atomic_set(&clp->cl_count, 1);
+                clp->cl_clientid = 0;
+                INIT_LIST_HEAD(&clp->cl_lockowners);
+        }
+        return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+        BUG_ON(!clp);
+        BUG_ON(!atomic_read(&clp->cl_count));
+        
+        if (atomic_dec_and_test(&clp->cl_count)) {
+                BUG_ON(!list_empty(&clp->cl_lockowners));
+                kfree(clp);
+        }
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 000000000000..edbf0e2a02d7
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,1777 @@
+/*
+ *  fs/nfs/nfs4xdr.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+/* Emperically, it seems that the NFS client gets confused if
+ * cookies larger than this are returned -- presumably a
+ * signedness issue?
+ */
+#define COOKIE_MAX		0x7fffffff
+
+#define NFS4_CLIENTID(server)	((server)->nfs4_state->cl_clientid)
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+extern int			nfs_stat_to_errno(int);
+
+#define NFS4_enc_void_sz	0
+#define NFS4_dec_void_sz	0
+#define NFS4_enc_compound_sz	1024  /* XXX: large enough? */
+#define NFS4_dec_compound_sz	1024  /* XXX: large enough? */
+
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+	{ 0,		NFNON	     },
+	{ S_IFREG,	NFREG	     },
+	{ S_IFDIR,	NFDIR	     },
+	{ S_IFBLK,	NFBLK	     },
+	{ S_IFCHR,	NFCHR	     },
+	{ S_IFLNK,	NFLNK	     },
+	{ S_IFSOCK,	NFSOCK	     },
+	{ S_IFIFO,	NFFIFO	     },
+	{ 0,		NFNON	     },
+	{ 0,		NFNON	     },
+};
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define ENCODE_HEAD						\
+	u32 *p;
+#define ENCODE_TAIL						\
+	return 0
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITE64(n)               do {				\
+	*p++ = htonl((u32)((n) >> 32));				\
+	*p++ = htonl((u32)(n));					\
+} while (0)
+#define WRITEMEM(ptr,nbytes)     do {				\
+	p = xdr_writemem(p, ptr, nbytes);			\
+} while (0)
+
+#define RESERVE_SPACE(nbytes)	do { BUG_ON(cp->p + XDR_QUADLEN(nbytes) > cp->end); p = cp->p; } while (0)
+#define ADJUST_ARGS()           cp->p = p
+
+static inline
+u32 *xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+	int tmp = XDR_QUADLEN(nbytes);
+	if (!tmp)
+		return p;
+	p[tmp-1] = 0;
+	memcpy(p, ptr, nbytes);
+	return p + tmp;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_uid(char *p, uid_t uid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_gid(char *p, gid_t gid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+static int
+encode_attrs(struct nfs4_compound *cp, struct iattr *iap)
+{
+	char owner_name[256];
+	char owner_group[256];
+	int owner_namelen = 0;
+	int owner_grouplen = 0;
+	u32 *q;
+	int len;
+	u32 bmval0 = 0;
+	u32 bmval1 = 0;
+	int status;
+	ENCODE_HEAD;
+
+	/*
+	 * We reserve enough space to write the entire attribute buffer at once.
+	 * In the worst-case, this would be
+	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+	 *          = 36 bytes, plus any contribution from variable-length fields
+	 *            such as owner/group/acl's.
+	 */
+	len = 36;
+
+	/* Sigh */
+	if (iap->ia_valid & ATTR_UID) {
+		status = owner_namelen = encode_uid(owner_name, iap->ia_uid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
+			       iap->ia_uid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_namelen);
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		status = owner_grouplen = encode_gid(owner_group, iap->ia_gid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
+			       iap->ia_gid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_grouplen);
+	}
+	RESERVE_SPACE(len);
+
+	/*
+	 * We write the bitmap length now, but leave the bitmap and the attribute
+	 * buffer length to be backfilled at the end of this routine.
+	 */
+	WRITE32(2);
+	q = p;
+	p += 3;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		bmval0 |= FATTR4_WORD0_SIZE;
+		WRITE64(iap->ia_size);
+	}
+	if (iap->ia_valid & ATTR_MODE) {
+		bmval1 |= FATTR4_WORD1_MODE;
+		WRITE32(iap->ia_mode);
+	}
+	if (iap->ia_valid & ATTR_UID) {
+		bmval1 |= FATTR4_WORD1_OWNER;
+		WRITE32(owner_namelen);
+		WRITEMEM(owner_name, owner_namelen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+		WRITE32(owner_grouplen);
+		WRITEMEM(owner_group, owner_grouplen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_ATIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_ATIME) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (iap->ia_valid & ATTR_MTIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_MTIME) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	
+	ADJUST_ARGS();
+	
+	/*
+	 * Now we backfill the bitmap and the attribute buffer length.
+	 */
+	len = (char *)p - (char *)q - 12;
+	*q++ = htonl(bmval0);
+	*q++ = htonl(bmval1);
+	*q++ = htonl(len);
+
+	status = 0;
+out:
+	return status;
+}
+
+static int
+encode_access(struct nfs4_compound *cp, struct nfs4_access *access)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8);
+	WRITE32(OP_ACCESS);
+	WRITE32(access->ac_req_access);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_close(struct nfs4_compound *cp, struct nfs4_close *close)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(20);
+	WRITE32(OP_CLOSE);
+	WRITE32(close->cl_seqid);
+	WRITEMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_commit(struct nfs4_compound *cp, struct nfs4_commit *commit)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(16);
+        WRITE32(OP_COMMIT);
+        WRITE64(commit->co_start);
+        WRITE32(commit->co_len);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_create(struct nfs4_compound *cp, struct nfs4_create *create)
+{
+	ENCODE_HEAD;
+	
+	RESERVE_SPACE(8);
+	WRITE32(OP_CREATE);
+	WRITE32(create->cr_ftype);
+	ADJUST_ARGS();
+
+	switch (create->cr_ftype) {
+	case NF4LNK:
+		RESERVE_SPACE(4 + create->cr_textlen);
+		WRITE32(create->cr_textlen);
+		WRITEMEM(create->cr_text, create->cr_textlen);
+		ADJUST_ARGS();
+		break;
+
+	case NF4BLK: case NF4CHR:
+		RESERVE_SPACE(8);
+		WRITE32(create->cr_specdata1);
+		WRITE32(create->cr_specdata2);
+		ADJUST_ARGS();
+		break;
+
+	default:
+		break;
+	}
+
+	RESERVE_SPACE(4 + create->cr_namelen);
+	WRITE32(create->cr_namelen);
+	WRITEMEM(create->cr_name, create->cr_namelen);
+	ADJUST_ARGS();
+
+	return encode_attrs(cp, create->cr_attrs);
+}
+
+static int
+encode_getattr(struct nfs4_compound *cp, struct nfs4_getattr *getattr)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(16);
+        WRITE32(OP_GETATTR);
+        WRITE32(2);
+        WRITE32(getattr->gt_bmval[0]);
+        WRITE32(getattr->gt_bmval[1]);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_getfh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_GETFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_link(struct nfs4_compound *cp, struct nfs4_link *link)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + link->ln_namelen);
+	WRITE32(OP_LINK);
+	WRITE32(link->ln_namelen);
+	WRITEMEM(link->ln_name, link->ln_namelen);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_lookup(struct nfs4_compound *cp, struct nfs4_lookup *lookup)
+{
+	int len = lookup->lo_name->len;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_LOOKUP);
+	WRITE32(len);
+	WRITEMEM(lookup->lo_name->name, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_open(struct nfs4_compound *cp, struct nfs4_open *open)
+{
+	static int global_id = 0;
+	int id = global_id++;
+	int status;
+	ENCODE_HEAD;
+	
+	/* seqid, share_access, share_deny, clientid, ownerlen, owner, opentype */
+	RESERVE_SPACE(52);
+	WRITE32(OP_OPEN);
+	WRITE32(0);                       /* seqid */
+	WRITE32(open->op_share_access);
+	WRITE32(0);                       /* for us, share_deny== 0 always */
+	WRITE64(NFS4_CLIENTID(cp->server));
+	WRITE32(4);
+	WRITE32(id);
+	WRITE32(open->op_opentype);
+	ADJUST_ARGS();
+	
+	if (open->op_opentype == NFS4_OPEN_CREATE) {
+		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE) {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITEMEM(open->op_verifier, sizeof(nfs4_verifier));
+			ADJUST_ARGS();
+		}
+		else if (open->op_attrs) {
+			RESERVE_SPACE(4);
+			WRITE32(open->op_createmode);
+			ADJUST_ARGS();
+			if ((status = encode_attrs(cp, open->op_attrs)))
+				return status;
+		}
+		else {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITE32(0);
+			WRITE32(0);
+			ADJUST_ARGS();
+		}
+	}
+
+	RESERVE_SPACE(8 + open->op_name->len);
+	WRITE32(NFS4_OPEN_CLAIM_NULL);
+	WRITE32(open->op_name->len);
+	WRITEMEM(open->op_name->name, open->op_name->len);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_open_confirm(struct nfs4_compound *cp, struct nfs4_open_confirm *open_confirm)
+{
+	ENCODE_HEAD;
+
+	/*
+	 * Note: In this "stateless" implementation, the OPEN_CONFIRM
+	 * seqid is always equal to 1.
+	 */
+	RESERVE_SPACE(24);
+	WRITE32(OP_OPEN_CONFIRM);
+	WRITEMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	WRITE32(1);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_putfh(struct nfs4_compound *cp, struct nfs4_putfh *putfh)
+{
+	int len = putfh->pf_fhandle->size;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_PUTFH);
+	WRITE32(len);
+	WRITEMEM(putfh->pf_fhandle->data, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_putrootfh(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(4);
+        WRITE32(OP_PUTROOTFH);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_read(struct nfs4_compound *cp, struct nfs4_read *read, struct rpc_rqst *req)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	int		replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(32);
+	WRITE32(OP_READ);
+	WRITE32(0);   /* all-zero stateid! */
+	WRITE32(0);
+	WRITE32(0);
+	WRITE32(0);
+	WRITE64(read->rd_offset);
+	WRITE32(read->rd_length);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel status + taglen + rescount + OP_PUTFH + status
+	 *       + OP_READ + status + eof + datalen = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 read->rd_pages, read->rd_pgbase, read->rd_length);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readdir(struct nfs4_compound *cp, struct nfs4_readdir *readdir, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(40);
+	WRITE32(OP_READDIR);
+	WRITE64(readdir->rd_cookie);
+	WRITEMEM(readdir->rd_req_verifier, sizeof(nfs4_verifier));
+	WRITE32(readdir->rd_count >> 5);  /* meaningless "dircount" field */
+	WRITE32(readdir->rd_count);
+	WRITE32(2);
+	WRITE32(readdir->rd_bmval[0]);
+	WRITE32(readdir->rd_bmval[1]);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READDIR + status + verifer(2)  = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages,
+			 readdir->rd_pgbase, readdir->rd_count);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readlink(struct nfs4_compound *cp, struct nfs4_readlink *readlink, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_READLINK);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READLINK + status  = 7
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 7 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count);
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_remove(struct nfs4_compound *cp, struct nfs4_remove *remove)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + remove->rm_namelen);
+	WRITE32(OP_REMOVE);
+	WRITE32(remove->rm_namelen);
+	WRITEMEM(remove->rm_name, remove->rm_namelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_rename(struct nfs4_compound *cp, struct nfs4_rename *rename)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + rename->rn_oldnamelen);
+	WRITE32(OP_RENAME);
+	WRITE32(rename->rn_oldnamelen);
+	WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen);
+	ADJUST_ARGS();
+	
+	RESERVE_SPACE(8 + rename->rn_newnamelen);
+	WRITE32(rename->rn_newnamelen);
+	WRITEMEM(rename->rn_newname, rename->rn_newnamelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_renew(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(12);
+	WRITE32(OP_RENEW);
+	WRITE64(NFS4_CLIENTID(cp->server));
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_restorefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_RESTOREFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_savefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_SAVEFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setattr(struct nfs4_compound *cp, struct nfs4_setattr *setattr)
+{
+	int status;
+	ENCODE_HEAD;
+	
+        RESERVE_SPACE(20);
+        WRITE32(OP_SETATTR);
+	WRITEMEM(setattr->st_stateid, sizeof(nfs4_stateid));
+        ADJUST_ARGS();
+
+        if ((status = encode_attrs(cp, setattr->st_iap)))
+		return status;
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_setclientid(struct nfs4_compound *cp, struct nfs4_setclientid *setclientid)
+{
+	u32 total_len;
+	u32 len1, len2, len3;
+	ENCODE_HEAD;
+
+	len1 = strlen(setclientid->sc_name);
+	len2 = strlen(setclientid->sc_netid);
+	len3 = strlen(setclientid->sc_uaddr);
+	total_len = XDR_QUADLEN(len1) + XDR_QUADLEN(len2) + XDR_QUADLEN(len3);
+	total_len = (total_len << 2) + 32;
+
+	RESERVE_SPACE(total_len);
+	WRITE32(OP_SETCLIENTID);
+	WRITEMEM(setclientid->sc_verifier, sizeof(nfs4_verifier));
+	WRITE32(len1);
+	WRITEMEM(setclientid->sc_name, len1);
+	WRITE32(setclientid->sc_prog);
+	WRITE32(len2);
+	WRITEMEM(setclientid->sc_netid, len2);
+	WRITE32(len3);
+	WRITEMEM(setclientid->sc_uaddr, len3);
+	WRITE32(setclientid->sc_cb_ident);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setclientid_confirm(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(12 + sizeof(nfs4_verifier));
+        WRITE32(OP_SETCLIENTID_CONFIRM);
+        WRITE64(cp->server->nfs4_state->cl_clientid);
+        WRITEMEM(cp->server->nfs4_state->cl_confirm,sizeof(nfs4_verifier));
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_write(struct nfs4_compound *cp, struct nfs4_write *write, struct rpc_rqst *req)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(36);
+	WRITE32(OP_WRITE);
+	WRITE32(0xffffffff);     /* magic stateid -1 */
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE64(write->wr_offset);
+	WRITE32(write->wr_stable_how);
+	WRITE32(write->wr_len);
+	ADJUST_ARGS();
+
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+	xdr_encode_pages(sndbuf, write->wr_pages, write->wr_pgbase, write->wr_len);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	int i, status = 0;
+	ENCODE_HEAD;
+
+	dprintk("encode_compound: tag=%.*s\n", (int)cp->taglen, cp->tag);
+	
+	RESERVE_SPACE(12 + cp->taglen);
+	WRITE32(cp->taglen);
+	WRITEMEM(cp->tag, cp->taglen);
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(cp->req_nops);
+	ADJUST_ARGS();
+
+	for (i = 0; i < cp->req_nops; i++) {
+		switch (cp->ops[i].opnum) {
+		case OP_ACCESS:
+			status = encode_access(cp, &cp->ops[i].u.access);
+			break;
+		case OP_CLOSE:
+			status = encode_close(cp, &cp->ops[i].u.close);
+			break;
+		case OP_COMMIT:
+			status = encode_commit(cp, &cp->ops[i].u.commit);
+			break;
+		case OP_CREATE:
+			status = encode_create(cp, &cp->ops[i].u.create);
+			break;
+		case OP_GETATTR:
+			status = encode_getattr(cp, &cp->ops[i].u.getattr);
+			break;
+		case OP_GETFH:
+			status = encode_getfh(cp);
+			break;
+		case OP_LINK:
+			status = encode_link(cp, &cp->ops[i].u.link);
+			break;
+		case OP_LOOKUP:
+			status = encode_lookup(cp, &cp->ops[i].u.lookup);
+			break;
+		case OP_OPEN:
+			status = encode_open(cp, &cp->ops[i].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = encode_open_confirm(cp, &cp->ops[i].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = encode_putfh(cp, &cp->ops[i].u.putfh);
+			break;
+		case OP_PUTROOTFH:
+			status = encode_putrootfh(cp);
+			break;
+		case OP_READ:
+			status = encode_read(cp, &cp->ops[i].u.read, req);
+			break;
+		case OP_READDIR:
+			status = encode_readdir(cp, &cp->ops[i].u.readdir, req);
+			break;
+		case OP_READLINK:
+			status = encode_readlink(cp, &cp->ops[i].u.readlink, req);
+			break;
+		case OP_REMOVE:
+			status = encode_remove(cp, &cp->ops[i].u.remove);
+			break;
+		case OP_RENAME:
+			status = encode_rename(cp, &cp->ops[i].u.rename);
+			break;
+		case OP_RENEW:
+			status = encode_renew(cp);
+			break;
+		case OP_RESTOREFH:
+			status = encode_restorefh(cp);
+			break;
+		case OP_SAVEFH:
+			status = encode_savefh(cp);
+			break;
+		case OP_SETATTR:
+			status = encode_setattr(cp, &cp->ops[i].u.setattr);
+			break;
+		case OP_SETCLIENTID:
+			status = encode_setclientid(cp, &cp->ops[i].u.setclientid);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = encode_setclientid_confirm(cp);
+			break;
+		case OP_WRITE:
+			status = encode_write(cp, &cp->ops[i].u.write, req);
+			break;
+		default:
+			BUG();
+		}
+		if (status)
+			return status;
+	}
+	
+	ENCODE_TAIL;
+}
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+
+/*
+ * Encode void argument
+ */
+static int
+nfs4_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode COMPOUND argument
+ */
+static int
+nfs4_xdr_enc_compound(struct rpc_rqst *req, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((char *)req->rq_svec[0].iov_base + req->rq_svec[0].iov_len);
+	status = encode_compound(cp, req);
+	cp->timestamp = jiffies;
+
+	if (!status && !sndbuf->page_len)
+		req->rq_slen = xdr_adjust_iovec(sndbuf->head, cp->p);
+	return status;
+}
+
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define DECODE_HEAD				\
+	u32 *p;					\
+	int status
+#define DECODE_TAIL				\
+	status = 0;				\
+out:						\
+	return status;				\
+xdr_error:					\
+	printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+	status = -EIO;				\
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {			\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define READTIME(x)       do {			\
+	p++;					\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+#define READ_BUF(nbytes)  do {			\
+	if (nbytes > (u32)((char *)cp->end - (char *)cp->p))  \
+		goto xdr_error;			\
+	p = cp->p;				\
+	cp->p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_uid(char *p, u32 len, uid_t *uid)
+{
+	*uid = -2;
+	return 0;
+}
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_gid(char *p, u32 len, gid_t *gid)
+{
+	*gid = -2;
+	return 0;
+}
+
+static int
+decode_change_info(struct nfs4_compound *cp, struct nfs4_change_info *cinfo)
+{
+	DECODE_HEAD;
+
+	READ_BUF(20);
+	READ32(cinfo->atomic);
+	READ64(cinfo->before);
+	READ64(cinfo->after);
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_access(struct nfs4_compound *cp, int nfserr, struct nfs4_access *access)
+{
+	u32 supp, acc;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		READ32(supp);
+		READ32(acc);
+
+		status = -EIO;
+		if ((supp & ~access->ac_req_access) || (acc & ~supp)) {
+			printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n");
+			goto out;
+		}
+		*access->ac_resp_supported = supp;
+		*access->ac_resp_access = acc;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_close(struct nfs4_compound *cp, int nfserr, struct nfs4_close *close)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_commit(struct nfs4_compound *cp, int nfserr, struct nfs4_commit *commit)
+{
+        DECODE_HEAD;
+
+        if (!nfserr) {
+                READ_BUF(8);
+                COPYMEM(commit->co_verifier->verifier, 8);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_create(struct nfs4_compound *cp, int nfserr, struct nfs4_create *create)
+{
+	u32 bmlen;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, create->cr_cinfo)))
+			goto out;
+		READ_BUF(4);
+		READ32(bmlen);
+		if (bmlen > 2)
+			goto xdr_error;
+		READ_BUF(bmlen << 2);
+	}
+
+	DECODE_TAIL;
+}
+
+extern u32 nfs4_fattr_bitmap[2];
+extern u32 nfs4_fsinfo_bitmap[2];
+extern u32 nfs4_fsstat_bitmap[2];
+extern u32 nfs4_pathconf_bitmap[2];
+
+static int
+decode_getattr(struct nfs4_compound *cp, int nfserr, struct nfs4_getattr *getattr)
+{
+        struct nfs_fattr *nfp = getattr->gt_attrs;
+	struct nfs_fsstat *fsstat = getattr->gt_fsstat;
+	struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo;
+	struct nfs_pathconf *pathconf = getattr->gt_pathconf;
+        u32 bmlen;
+        u32 bmval0 = 0;
+        u32 bmval1 = 0;
+        u32 attrlen;
+        u32 dummy32;
+        u32 len = 0;
+	unsigned int type;
+	int fmode = 0;
+        DECODE_HEAD;
+	
+        if (nfserr)
+                goto success;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 2)
+                goto xdr_error;
+	
+        READ_BUF((bmlen << 2) + 4);
+        if (bmlen > 0)
+                READ32(bmval0);
+        if (bmlen > 1)
+                READ32(bmval1);
+        READ32(attrlen);
+
+	if ((bmval0 & ~getattr->gt_bmval[0]) ||
+	    (bmval1 & ~getattr->gt_bmval[1])) {
+		dprintk("read_attrs: server returned bad attributes!\n");
+		goto xdr_error;
+	}
+	getattr->gt_bmres[0] = bmval0;
+	getattr->gt_bmres[1] = bmval1;
+
+	/*
+	 * In case the server doesn't return some attributes,
+	 * we initialize them here to some nominal values..
+	 */
+	if (nfp) {
+		nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+		nfp->nlink = 1;
+		nfp->timestamp = jiffies;
+	}
+	if (fsinfo) {
+		fsinfo->rtmult = fsinfo->wtmult = 512;  /* ??? */
+		fsinfo->lease_time = 60;
+	}
+
+        if (bmval0 & FATTR4_WORD0_TYPE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(type);
+                if (type < NF4REG || type > NF4NAMEDATTR) {
+                        dprintk("read_attrs: bad type %d\n", type);
+                        goto xdr_error;
+                }
+		nfp->type = nfs_type2fmt[type].nfs2type;
+		fmode = nfs_type2fmt[type].mode;
+                dprintk("read_attrs: type=%d\n", (u32)nfp->type);
+        }
+        if (bmval0 & FATTR4_WORD0_CHANGE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->change_attr);
+                dprintk("read_attrs: changeid=%Ld\n", (u64)nfp->change_attr);
+        }
+        if (bmval0 & FATTR4_WORD0_SIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->size);
+                dprintk("read_attrs: size=%Ld\n", (u64)nfp->size);
+        }
+        if (bmval0 & FATTR4_WORD0_FSID) {
+                READ_BUF(16);
+                len += 16;
+                READ64(nfp->fsid_u.nfs4.major);
+                READ64(nfp->fsid_u.nfs4.minor);
+                dprintk("read_attrs: fsid=0x%Lx/0x%Lx\n",
+			nfp->fsid_u.nfs4.major, nfp->fsid_u.nfs4.minor);
+        }
+        if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(fsinfo->lease_time);
+                dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time);
+        }
+        if (bmval0 & FATTR4_WORD0_FILEID) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->fileid);
+                dprintk("read_attrs: fileid=%Ld\n", nfp->fileid);
+        }
+	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+		READ_BUF(8);
+		len += 8;
+		READ64(fsstat->afiles);
+		dprintk("read_attrs: files_avail=0x%Lx\n", fsstat->afiles);
+	}
+        if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->ffiles);
+                dprintk("read_attrs: files_free=0x%Lx\n", fsstat->ffiles);
+        }
+        if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tfiles);
+                dprintk("read_attrs: files_tot=0x%Lx\n", fsstat->tfiles);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->maxfilesize);
+                dprintk("read_attrs: maxfilesize=0x%Lx\n", fsinfo->maxfilesize);
+        }
+	if (bmval0 & FATTR4_WORD0_MAXLINK) {
+		READ_BUF(4);
+		len += 4;
+		READ32(pathconf->max_link);
+		dprintk("read_attrs: maxlink=%d\n", pathconf->max_link);
+	}
+        if (bmval0 & FATTR4_WORD0_MAXNAME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(pathconf->max_namelen);
+                dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXREAD) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->rtmax);
+		fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
+                dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->wtmax);
+		fsinfo->wtpref = fsinfo->wtmax;
+                dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax);
+        }
+	
+        if (bmval1 & FATTR4_WORD1_MODE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+		nfp->mode = (dummy32 & ~S_IFMT) | fmode;
+                dprintk("read_attrs: mode=0%o\n", nfp->mode);
+        }
+        if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+                READ_BUF(4);
+                len += 4;
+                READ32(nfp->nlink);
+                dprintk("read_attrs: nlinks=0%o\n", nfp->nlink);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);    /* name length */
+                if (dummy32 > XDR_MAX_NETOBJ) {
+			dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_uid((char *)p, dummy32, &nfp->uid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: uid=%d\n", (int)nfp->uid);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+                if (dummy32 > XDR_MAX_NETOBJ) {
+                        dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_gid((char *)p, dummy32, &nfp->gid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: gid=%d\n", (int)nfp->gid);
+        }
+        if (bmval1 & FATTR4_WORD1_RAWDEV) {
+                READ_BUF(8);
+                len += 8;
+                READ32(dummy32);
+		nfp->rdev = (dummy32 << MINORBITS);
+                READ32(dummy32);
+		nfp->rdev |= (dummy32 & MINORMASK);
+                dprintk("read_attrs: rdev=%d\n", nfp->rdev);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->abytes);
+                dprintk("read_attrs: savail=0x%Lx\n", fsstat->abytes);
+        }
+	if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->fbytes);
+                dprintk("read_attrs: sfree=0x%Lx\n", fsstat->fbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tbytes);
+                dprintk("read_attrs: stotal=0x%Lx\n", fsstat->tbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->du.nfs3.used);
+                dprintk("read_attrs: sused=0x%Lx\n", nfp->du.nfs3.used);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->atime);
+                dprintk("read_attrs: atime=%d\n", (int)nfp->atime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->ctime);
+                dprintk("read_attrs: ctime=%d\n", (int)nfp->ctime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->mtime);
+                dprintk("read_attrs: mtime=%d\n", (int)nfp->mtime);
+        }
+        if (len != attrlen)
+                goto xdr_error;
+	
+success:
+        DECODE_TAIL;
+}
+
+static int
+decode_getfh(struct nfs4_compound *cp, int nfserr, struct nfs4_getfh *getfh)
+{
+	struct nfs_fh *fh = getfh->gf_fhandle;
+	int len;
+        DECODE_HEAD;
+
+	/* Zero handle first to allow comparisons */
+	memset(fh, 0, sizeof(*fh));
+		
+        if (!nfserr) {
+                READ_BUF(4);
+		READ32(len);
+		if (len > NFS_MAXFHSIZE)
+			goto xdr_error;
+		fh->size = len;
+                READ_BUF(len);
+                COPYMEM(fh->data, len);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_link(struct nfs4_compound *cp, int nfserr, struct nfs4_link *link)
+{
+	int status = 0;
+	
+	if (!nfserr)
+		status = decode_change_info(cp, link->ln_cinfo);
+	return status;
+}
+
+static int
+decode_open(struct nfs4_compound *cp, int nfserr, struct nfs4_open *open)
+{
+	u32 bmlen, delegation_type;
+	DECODE_HEAD;
+	
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open->op_stateid, sizeof(nfs4_stateid));
+
+		decode_change_info(cp, open->op_cinfo);
+
+		READ_BUF(8);
+		READ32(*open->op_rflags);
+		READ32(bmlen);
+		if (bmlen > 10)
+			goto xdr_error;
+		
+		READ_BUF((bmlen << 2) + 4);
+		p += bmlen;
+		READ32(delegation_type);
+		if (delegation_type != NFS4_OPEN_DELEGATE_NONE)
+			goto xdr_error;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_open_confirm(struct nfs4_compound *cp, int nfserr, struct nfs4_open_confirm *open_confirm)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_read(struct nfs4_compound *cp, int nfserr, struct nfs4_read *read)
+{
+	u32 throwaway;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		if (read->rd_eof)
+			READ32(*read->rd_eof);
+		else
+			READ32(throwaway);
+		READ32(*read->rd_bytes_read);
+		if (*read->rd_bytes_read > read->rd_length)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_readdir(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readdir *readdir)
+{
+	struct xdr_buf	*rcvbuf = &req->rq_rcv_buf;
+	struct page	*page = *rcvbuf->pages;
+	unsigned int	pglen = rcvbuf->page_len;
+	u32		*end, *entry;
+	u32		len, attrlen, word;
+	int 		i;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		COPYMEM(readdir->rd_resp_verifier, 8);
+
+		BUG_ON(pglen > PAGE_CACHE_SIZE);
+		p   = (u32 *) kmap(page);
+		end = (u32 *) ((char *)p + pglen + readdir->rd_pgbase);
+
+		while (*p++) {
+			entry = p - 1;
+			if (p + 3 > end)
+				goto short_pkt;
+			p += 2;     /* cookie */
+			len = ntohl(*p++);  /* filename length */
+			if (len > NFS4_MAXNAMLEN) {
+				printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			
+			p += XDR_QUADLEN(len);
+			if (p + 1 > end)
+				goto short_pkt;
+			len = ntohl(*p++);  /* bitmap length */
+			if (len > 10) {
+				printk(KERN_WARNING "NFS: giant bitmap in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			if (p + len + 1 > end)
+				goto short_pkt;
+			attrlen = 0;
+			for (i = 0; i < len; i++) {
+				word = ntohl(*p++);
+				if (!word)
+					continue;
+				else if (i == 0 && word == FATTR4_WORD0_FILEID) {
+					attrlen = 8;
+					continue;
+				}
+				printk(KERN_WARNING "NFS: unexpected bitmap word in readdir (0x%x)\n", word);
+				goto err_unmap;
+			}
+			if (ntohl(*p++) != attrlen) {
+				printk(KERN_WARNING "NFS: unexpected attrlen in readdir\n");
+				goto err_unmap;
+			}
+			p += XDR_QUADLEN(attrlen);
+			if (p + 1 > end)
+				goto short_pkt;
+		}
+		kunmap(page);
+	}
+	
+	DECODE_TAIL;
+short_pkt:
+	printk(KERN_NOTICE "NFS: short packet in readdir reply!\n");
+	/* truncate listing */
+	kunmap(page);
+	entry[0] = entry[1] = 0;
+	return 0;
+err_unmap:
+	kunmap(page);
+	return -errno_NFSERR_IO;
+}
+
+static int
+decode_readlink(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readlink *readlink)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	u32 *strlen;
+	u32 len;
+	char *string;
+
+	if (!nfserr) {
+		/*
+		 * The XDR encode routine has set things up so that
+		 * the link text will be copied directly into the
+		 * buffer.  We just have to do overflow-checking,
+		 * and and null-terminate the text (the VFS expects
+		 * null-termination).
+		 */
+		strlen = (u32 *) kmap(rcvbuf->pages[0]);
+		len = ntohl(*strlen);
+		if (len > PAGE_CACHE_SIZE - 5) {
+			printk(KERN_WARNING "nfs: server returned giant symlink!\n");
+			kunmap(rcvbuf->pages[0]);
+			return -EIO;
+		}
+		*strlen = len;
+		
+		string = (char *)(strlen + 1);
+		string[len] = '\0';
+		kunmap(rcvbuf->pages[0]);
+	}
+	return 0;
+}
+
+static int
+decode_remove(struct nfs4_compound *cp, int nfserr, struct nfs4_remove *remove)
+{
+	int status;
+
+	status = 0;
+	if (!nfserr) 
+		status = decode_change_info(cp, remove->rm_cinfo);
+	return status;
+}
+
+static int
+decode_rename(struct nfs4_compound *cp, int nfserr, struct nfs4_rename *rename)
+{
+	int status = 0;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, rename->rn_src_cinfo)))
+			goto out;
+		if ((status = decode_change_info(cp, rename->rn_dst_cinfo)))
+			goto out;
+	}
+out:
+	return status;
+}
+
+static int
+decode_setattr(struct nfs4_compound *cp)
+{
+        u32 bmlen;
+        DECODE_HEAD;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 10)
+                goto xdr_error;
+        READ_BUF(bmlen << 2);
+
+        DECODE_TAIL;
+}
+
+static int
+decode_setclientid(struct nfs4_compound *cp, int nfserr)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8 + sizeof(nfs4_verifier));
+		READ64(cp->server->nfs4_state->cl_clientid);
+		COPYMEM(cp->server->nfs4_state->cl_confirm, sizeof(nfs4_verifier));
+	}
+	else if (nfserr == NFSERR_CLID_INUSE) {
+		u32 len;
+
+		/* skip netid string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+
+		/* skip uaddr string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_write(struct nfs4_compound *cp, int nfserr, struct nfs4_write *write)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(16);
+		READ32(*write->wr_bytes_written);
+		if (*write->wr_bytes_written > write->wr_len)
+			goto xdr_error;
+		READ32(write->wr_verf->committed);
+		COPYMEM(write->wr_verf->verifier, 8);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	u32 taglen;
+	u32 opnum, nfserr;
+	DECODE_HEAD;
+
+	READ_BUF(8);
+	READ32(cp->toplevel_status);
+	READ32(taglen);
+
+	/*
+	 * We need this if our zero-copy I/O is going to work.  Rumor has
+	 * it that the spec will soon mandate it...
+	 */
+	if (taglen != cp->taglen)
+		dprintk("nfs4: non-conforming server returns tag length mismatch!\n");
+
+	READ_BUF(taglen + 4);
+	p += XDR_QUADLEN(taglen);
+	READ32(cp->resp_nops);
+	if (cp->resp_nops > cp->req_nops) {
+		dprintk("nfs4: resp_nops > req_nops!\n");
+		goto xdr_error;
+	}
+
+	for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++) {
+		READ_BUF(8);
+		READ32(opnum);
+		if (opnum != cp->ops[cp->nops].opnum) {
+			dprintk("nfs4: operation mismatch!\n");
+			goto xdr_error;
+		}
+		READ32(nfserr);
+		if (cp->nops == cp->resp_nops - 1) {
+			if (nfserr != cp->toplevel_status) {
+				dprintk("nfs4: status mismatch!\n");
+				goto xdr_error;
+			}
+		}
+		else if (nfserr) {
+			dprintk("nfs4: intermediate status nonzero!\n");
+			goto xdr_error;
+		}
+		cp->ops[cp->nops].nfserr = nfserr;
+
+		switch (opnum) {
+		case OP_ACCESS:
+			status = decode_access(cp, nfserr, &cp->ops[cp->nops].u.access);
+			break;
+		case OP_CLOSE:
+			status = decode_close(cp, nfserr, &cp->ops[cp->nops].u.close);
+			break;
+		case OP_COMMIT:
+			status = decode_commit(cp, nfserr, &cp->ops[cp->nops].u.commit);
+			break;
+		case OP_CREATE:
+			status = decode_create(cp, nfserr, &cp->ops[cp->nops].u.create);
+			break;
+		case OP_GETATTR:
+			status = decode_getattr(cp, nfserr, &cp->ops[cp->nops].u.getattr);
+			break;
+		case OP_GETFH:
+			status = decode_getfh(cp, nfserr, &cp->ops[cp->nops].u.getfh);
+			break;
+		case OP_LINK:
+			status = decode_link(cp, nfserr, &cp->ops[cp->nops].u.link);
+			break;
+		case OP_LOOKUP:
+			status = 0;
+			break;
+		case OP_OPEN:
+			status = decode_open(cp, nfserr, &cp->ops[cp->nops].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = decode_open_confirm(cp, nfserr, &cp->ops[cp->nops].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = 0;
+			break;
+		case OP_PUTROOTFH:
+			status = 0;
+			break;
+		case OP_READ:
+			status = decode_read(cp, nfserr, &cp->ops[cp->nops].u.read);
+			break;
+		case OP_READDIR:
+			status = decode_readdir(cp, nfserr, req, &cp->ops[cp->nops].u.readdir);
+			break;
+		case OP_READLINK:
+			status = decode_readlink(cp, nfserr, req, &cp->ops[cp->nops].u.readlink);
+			break;
+		case OP_RESTOREFH:
+			status = 0;
+			break;
+		case OP_REMOVE:
+			status = decode_remove(cp, nfserr, &cp->ops[cp->nops].u.remove);
+			break;
+		case OP_RENAME:
+			status = decode_rename(cp, nfserr, &cp->ops[cp->nops].u.rename);
+			break;
+		case OP_RENEW:
+			status = 0;
+			break;
+		case OP_SAVEFH:
+			status = 0;
+			break;
+		case OP_SETATTR:
+			status = decode_setattr(cp);
+			break;
+		case OP_SETCLIENTID:
+			status = decode_setclientid(cp, nfserr);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = 0;
+			break;
+		case OP_WRITE:
+			status = decode_write(cp, nfserr, &cp->ops[cp->nops].u.write);
+			break;
+		default:
+			BUG();
+			return -EIO;
+		}
+		if (status)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * Decode void reply
+ */
+static int
+nfs4_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	return 0;
+}
+
+/*
+ * Decode COMPOUND response
+ */
+static int
+nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((u8 *) rqstp->rq_rvec->iov_base + rqstp->rq_rvec->iov_len);
+
+	if ((status = decode_compound(cp, rqstp)))
+		goto out;
+	
+	status = 0;
+	if (cp->toplevel_status)
+		status = -nfs_stat_to_errno(cp->toplevel_status);
+
+out:
+	return status;
+}
+
+u32 *
+nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	u32 len;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->prev_cookie = entry->cookie;
+	p = xdr_decode_hyper(p, &entry->cookie);
+	entry->len = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+
+	if (entry->cookie > COOKIE_MAX)
+		entry->cookie = COOKIE_MAX;
+	
+	/*
+	 * In case the server doesn't return an inode number,
+	 * we fake one here.  (We don't use inode number 0,
+	 * since glibc seems to choke on it...)
+	 */
+	entry->ino = 1;
+
+	len = ntohl(*p++);             /* bitmap length */
+	p += len;
+	len = ntohl(*p++);             /* attribute buffer length */
+	if (len)
+		p = xdr_decode_hyper(p, &entry->ino);
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype)				\
+    { "nfs4_" #proc,						\
+      (kxdrproc_t) nfs4_xdr_##argtype,				\
+      (kxdrproc_t) nfs4_xdr_##restype,				\
+      MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+      0							\
+    }
+
+static struct rpc_procinfo	nfs4_procedures[] = {
+  PROC(null,		enc_void,	dec_void),
+  PROC(compound,	enc_compound,	dec_compound)
+};
+
+struct rpc_version		nfs_version4 = {
+	.number			= 4,
+	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.procs			= nfs4_procedures
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 2ad13ec4cd27..a5a1c373444d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -460,17 +460,62 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 
 static int
 nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fsinfo *info)
+			struct nfs_fsstat *stat)
 {
+	struct nfs2_fsstat fsinfo;
 	int	status;
 
 	dprintk("NFS call  statfs\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0);
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply statfs: %d\n", status);
+	if (status)
+		goto out;
+	stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+	stat->fbytes = (u64)fsinfo.bfree  * fsinfo.bsize;
+	stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+	stat->tfiles = 0;
+	stat->ffiles = 0;
+	stat->afiles = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	if (status)
+		goto out;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+out:
 	return status;
 }
 
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_pathconf *info)
+{
+	info->max_link = 0;
+	info->max_namelen = NFS2_MAXNAMLEN;
+	return 0;
+}
+
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
 static void
@@ -590,6 +635,8 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.readdir	= nfs_proc_readdir,
 	.mknod		= nfs_proc_mknod,
 	.statfs		= nfs_proc_statfs,
+	.fsinfo		= nfs_proc_fsinfo,
+	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.write_setup	= nfs_proc_write_setup,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5976fa3e466f..e6ed1a443116 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,6 +18,7 @@
 #include <linux/blk.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <../drivers/base/fs/fs.h>	/* Eeeeewwwww */
 
 #include "check.h"
 
@@ -111,115 +112,17 @@ char *disk_name(struct gendisk *hd, int part, char *buf)
 	return buf;
 }
 
-/* Driverfs file support */
-static ssize_t partition_device_kdev_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off)
-{
-	kdev_t kdev; 
-	kdev.value=(int)(long)driverfs_dev->driver_data;
-	return off ? 0 : sprintf (page, "%x\n",kdev.value);
-}
-static DEVICE_ATTR(kdev,S_IRUGO,partition_device_kdev_read,NULL);
-
-static ssize_t partition_device_type_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off) 
-{
-	return off ? 0 : sprintf (page, "BLK\n");
-}
-static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
-
-static void driverfs_create_partitions(struct gendisk *hd)
-{
-	int max_p = 1<<hd->minor_shift;
-	struct hd_struct *p = hd->part;
-	char name[DEVICE_NAME_SIZE];
-	char bus_id[BUS_ID_SIZE];
-	struct device *dev, *parent;
-	int part;
-
-	/* if driverfs not supported by subsystem, skip partitions */
-	if (!(hd->flags & GENHD_FL_DRIVERFS))
-		return;
-
-	parent = hd->driverfs_dev;
-
-	if (parent)  {
-		sprintf(name, "%s", parent->name);
-		sprintf(bus_id, "%s:", parent->bus_id);
-	} else {
-		*name = *bus_id = '\0';
-	}
-
-	dev = &hd->disk_dev;
-	dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor);
-	sprintf(dev->name, "%sdisc", name);
-	sprintf(dev->bus_id, "%sdisc", bus_id);
-	for (part=1; part < max_p; part++) {
-		dev = &p[part-1].hd_driverfs_dev;
-		sprintf(dev->name, "%spart%d", name, part);
-		sprintf(dev->bus_id, "%s:p%d", bus_id, part);
-		if (!p[part-1].nr_sects)
-			continue;
-		dev->driver_data =
-				(void *)(long)__mkdev(hd->major, hd->first_minor+part);
-	}
-
-	dev = &hd->disk_dev;
-	dev->parent = parent;
-	if (parent)
-		dev->bus = parent->bus;
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
-
-	for (part=0; part < max_p-1; part++) {
-		dev = &p[part].hd_driverfs_dev;
-		dev->parent = parent;
-		if (parent)
-			dev->bus = parent->bus;
-		if (!dev->driver_data)
-			continue;
-		device_register(dev);
-		device_create_file(dev, &dev_attr_type);
-		device_create_file(dev, &dev_attr_kdev);
-	}
-}
-
-static void driverfs_remove_partitions(struct gendisk *hd)
-{
-	int max_p = 1<<hd->minor_shift;
-	struct device *dev;
-	struct hd_struct *p;
-	int part;
-
-	for (part=1, p = hd->part; part < max_p; part++, p++) {
-		dev = &p->hd_driverfs_dev;
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
-		}
-	}
-	dev = &hd->disk_dev;
-	if (dev->driver_data) {
-		device_remove_file(dev, &dev_attr_type);
-		device_remove_file(dev, &dev_attr_kdev);
-		put_device(dev);	
-		dev->driver_data = NULL;
-	}
-}
-
-static void check_partition(struct gendisk *hd, struct block_device *bdev)
+static struct parsed_partitions *
+check_partition(struct gendisk *hd, struct block_device *bdev)
 {
+	struct parsed_partitions *state;
 	devfs_handle_t de = NULL;
 	char buf[64];
-	struct parsed_partitions *state;
-	int i;
+	int i, res;
 
 	state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
 	if (!state)
-		return;
+		return NULL;
 
 	if (hd->flags & GENHD_FL_DEVFS)
 		de = hd->de;
@@ -233,32 +136,20 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev)
 		if (isdigit(state->name[strlen(state->name)-1]))
 			sprintf(state->name, "p");
 	}
-	state->limit = 1<<hd->minor_shift;
-	for (i = 0; check_part[i]; i++) {
-		int res, j;
-		struct hd_struct *p;
+	state->limit = hd->minors;
+	i = res = 0;
+	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
-		res = check_part[i](state, bdev);
-		if (!res)
-			continue;
-		if (res < 0) {
-			if (warn_no_part)
-				printk(" unable to read partition table\n");
-			return;
-		} 
-		p = hd->part;
-		for (j = 1; j < state->limit; j++) {
-			p[j-1].start_sect = state->parts[j].from;
-			p[j-1].nr_sects = state->parts[j].size;
-#if CONFIG_BLK_DEV_MD
-			if (!state->parts[j].flags)
-				continue;
-			md_autodetect_dev(bdev->bd_dev+j);
-#endif
-		}
-		return;
+		res = check_part[i++](state, bdev);
 	}
-	printk(" unknown partition table\n");
+	if (res > 0)
+		return state;
+	if (!res)
+		printk(" unknown partition table\n");
+	else if (warn_no_part)
+		printk(" unable to read partition table\n");
+	kfree(state);
+	return NULL;
 }
 
 static void devfs_register_partition(struct gendisk *dev, int part)
@@ -298,7 +189,7 @@ static void devfs_create_partitions(struct gendisk *dev)
 	unsigned int devfs_flags = DEVFS_FL_DEFAULT;
 	char dirname[64], symlink[16];
 	static devfs_handle_t devfs_handle;
-	int part, max_p = 1<<dev->minor_shift;
+	int part, max_p = dev->minors;
 	struct hd_struct *p = dev->part;
 
 	if (dev->flags & GENHD_FL_REMOVABLE)
@@ -329,9 +220,6 @@ static void devfs_create_partitions(struct gendisk *dev)
 	devfs_auto_unregister(dev->disk_de, slave);
 	if (!(dev->flags & GENHD_FL_DEVFS))
 		devfs_auto_unregister (slave, dir);
-	for (part = 1; part < max_p; part++, p++)
-		if (p->nr_sects)
-			devfs_register_partition(dev, part);
 #endif
 }
 
@@ -379,11 +267,6 @@ static void devfs_create_cdrom(struct gendisk *dev)
 static void devfs_remove_partitions(struct gendisk *dev)
 {
 #ifdef CONFIG_DEVFS_FS
-	int part;
-	for (part = (1<<dev->minor_shift)-1; part--; ) {
-		devfs_unregister(dev->part[part].de);
-		dev->part[part].de = NULL;
-	}
 	devfs_unregister(dev->disk_de);
 	dev->disk_de = NULL;
 	if (dev->flags & GENHD_FL_CD)
@@ -393,15 +276,196 @@ static void devfs_remove_partitions(struct gendisk *dev)
 #endif
 }
 
+static ssize_t part_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->parent->driver_data;
+	struct hd_struct *p = dev->driver_data;
+	int part = p - disk->part + 1;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base + part);
+}
+static ssize_t part_start_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->start_sect);
+}
+static ssize_t part_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->nr_sects);
+}
+static struct device_attribute part_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= part_dev_read
+};
+static struct device_attribute part_attr_start = {
+	.attr = {.name = "start", .mode = S_IRUGO },
+	.show	= part_start_read
+};
+static struct device_attribute part_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= part_size_read
+};
+
+void delete_partition(struct gendisk *disk, int part)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *dev;
+	if (!p->nr_sects)
+		return;
+	p->start_sect = 0;
+	p->nr_sects = 0;
+	devfs_unregister(p->de);
+	dev = p->hd_driverfs_dev;
+	p->hd_driverfs_dev = NULL;
+	if (dev) {
+		device_remove_file(dev, &part_attr_size);
+		device_remove_file(dev, &part_attr_start);
+		device_remove_file(dev, &part_attr_dev);
+		device_unregister(dev);	
+	}
+}
+
+static void part_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *parent = &disk->disk_dev;
+	struct device *dev;
+
+	p->start_sect = start;
+	p->nr_sects = len;
+	devfs_register_partition(disk, part);
+	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
+		return;
+	memset(dev, 0, sizeof(struct device));
+	dev->parent = parent;
+	sprintf(dev->bus_id, "p%d", part);
+	dev->release = part_release;
+	dev->driver_data = p;
+	device_register(dev);
+	device_create_file(dev, &part_attr_dev);
+	device_create_file(dev, &part_attr_start);
+	device_create_file(dev, &part_attr_size);
+	p->hd_driverfs_dev = dev;
+}
+
+static ssize_t disk_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base);
+}
+static ssize_t disk_range_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%d\n",disk->minors);
+}
+static ssize_t disk_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)get_capacity(disk));
+}
+static struct device_attribute disk_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= disk_dev_read
+};
+static struct device_attribute disk_attr_range = {
+	.attr = {.name = "range", .mode = S_IRUGO },
+	.show	= disk_range_read
+};
+static struct device_attribute disk_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= disk_size_read
+};
+
+static void disk_driverfs_symlinks(struct gendisk *disk)
+{
+	struct device *target = disk->driverfs_dev;
+	struct device *dev = &disk->disk_dev;
+	struct device *p;
+	char *path;
+	char *s;
+	int length;
+	int depth;
+
+	if (!target)
+		return;
+
+	get_device(target);
+
+	length = get_devpath_length(target);
+	length += strlen("..");
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+
+	/* our relative position */
+	strcpy(path,"..");
+
+	fill_devpath(target, path, length);
+	driverfs_create_symlink(&dev->dir, "device", path);
+	kfree(path);
+
+	for (p = target, depth = 0; p; p = p->parent, depth++)
+		;
+	length = get_devpath_length(dev);
+	length += 3 * depth - 1;
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+	for (s = path; depth--; s += 3)
+		strcpy(s, "../");
+
+	fill_devpath(dev, path, length);
+	driverfs_create_symlink(&target->dir, "block", path);
+	kfree(path);
+}
+
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct device *dev = &disk->disk_dev;
+	struct parsed_partitions *state;
 	struct block_device *bdev;
+	char *s;
+	int j;
+
+	strcpy(dev->bus_id, disk->disk_name);
+	/* ewww... some of these buggers have / in name... */
+	s = strchr(dev->bus_id, '/');
+	if (s)
+		*s = '!';
+	device_add(dev);
+	device_create_file(dev, &disk_attr_dev);
+	device_create_file(dev, &disk_attr_range);
+	device_create_file(dev, &disk_attr_size);
+	disk_driverfs_symlinks(disk);
+
 	if (disk->flags & GENHD_FL_CD)
 		devfs_create_cdrom(disk);
 
 	/* No minors to use for partitions */
-	if (!disk->minor_shift)
+	if (disk->minors == 1)
 		return;
 
 	/* No such device (e.g., media were just removed) */
@@ -411,45 +475,32 @@ void register_disk(struct gendisk *disk)
 	bdev = bdget(MKDEV(disk->major, disk->first_minor));
 	if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 		return;
-	check_partition(disk, bdev);
-	driverfs_create_partitions(disk);
+	state = check_partition(disk, bdev);
 	devfs_create_partitions(disk);
-	blkdev_put(bdev, BDEV_RAW);
-}
-
-void update_partition(struct gendisk *disk, int part)
-{
-	struct hd_struct *p = disk->part + part - 1;
-	struct device *dev = &p->hd_driverfs_dev;
-
-	if (!p->nr_sects) {
-		if (p->de) {
-			devfs_unregister(p->de);
-			p->de = NULL;
-		}
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
+	if (state) {
+		for (j = 1; j < state->limit; j++) {
+			sector_t size = state->parts[j].size;
+			sector_t from = state->parts[j].from;
+			if (!size)
+				continue;
+			add_partition(disk, j, from, size);
+#if CONFIG_BLK_DEV_MD
+			if (!state->parts[j].flags)
+				continue;
+			md_autodetect_dev(bdev->bd_dev+j);
+#endif
 		}
-		return;
+		kfree(state);
 	}
-	if (!p->de)
-		devfs_register_partition(disk, part);
-	if (dev->driver_data || !(disk->flags & GENHD_FL_DRIVERFS))
-		return;
-	dev->driver_data =
-		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
+	blkdev_put(bdev, BDEV_RAW);
 }
 
 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 {
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
+	struct parsed_partitions *state;
 	int p, res;
+
 	if (!bdev->bd_invalidated)
 		return 0;
 	if (bdev->bd_part_count)
@@ -458,16 +509,25 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (res)
 		return res;
 	bdev->bd_invalidated = 0;
-	for (p = 0; p < (1<<disk->minor_shift) - 1; p++) {
-		disk->part[p].start_sect = 0;
-		disk->part[p].nr_sects = 0;
-	}
+	for (p = 1; p < disk->minors; p++)
+		delete_partition(disk, p);
 	if (bdev->bd_op->revalidate)
 		bdev->bd_op->revalidate(dev);
-	if (get_capacity(disk))
-		check_partition(disk, bdev);
-	for (p = 1; p < (1<<disk->minor_shift); p++)
-		update_partition(disk, p);
+	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
+		return res;
+	for (p = 1; p < state->limit; p++) {
+		sector_t size = state->parts[p].size;
+		sector_t from = state->parts[p].from;
+		if (!size)
+			continue;
+		add_partition(disk, p, from, size);
+#if CONFIG_BLK_DEV_MD
+		if (!state->parts[j].flags)
+			continue;
+		md_autodetect_dev(bdev->bd_dev+p);
+#endif
+	}
+	kfree(state);
 	return res;
 }
 
@@ -493,48 +553,33 @@ fail:
 	return NULL;
 }
 
-static int wipe_partitions(struct gendisk *disk)
+void del_gendisk(struct gendisk *disk)
 {
-	int max_p = 1 << disk->minor_shift;
+	int max_p = disk->minors;
 	kdev_t devp;
-	int res;
 	int p;
 
 	/* invalidate stuff */
 	for (p = max_p - 1; p > 0; p--) {
 		devp = mk_kdev(disk->major,disk->first_minor + p);
-#if 0					/* %%% superfluous? */
-		if (disk->part[p-1].nr_sects == 0)
-			continue;
-#endif
-		res = invalidate_device(devp, 1);
-		if (res)
-			return res;
-		disk->part[p-1].start_sect = 0;
-		disk->part[p-1].nr_sects = 0;
+		invalidate_device(devp, 1);
+		delete_partition(disk, p);
 	}
 	devp = mk_kdev(disk->major,disk->first_minor);
-#if 0					/* %%% superfluous? */
-	if (disk->part[p].nr_sects == 0)
-		continue;
-#endif
-	res = invalidate_device(devp, 1);
-	if (res)
-		return res;
+	invalidate_device(devp, 1);
 	disk->capacity = 0;
-	return 0;
-}
-
-void del_gendisk(struct gendisk *disk)
-{
-	driverfs_remove_partitions(disk);
-	wipe_partitions(disk);
+	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
 	devfs_remove_partitions(disk);
-	if (disk->part) {
-		kfree(disk->part);
-		disk->part = NULL;
+	device_remove_file(&disk->disk_dev, &disk_attr_dev);
+	device_remove_file(&disk->disk_dev, &disk_attr_range);
+	device_remove_file(&disk->disk_dev, &disk_attr_size);
+	driverfs_remove_file(&disk->disk_dev.dir, "device");
+	if (disk->driverfs_dev) {
+		driverfs_remove_file(&disk->driverfs_dev->dir, "block");
+		put_device(disk->driverfs_dev);
 	}
+	device_del(&disk->disk_dev);
 }
 
 struct dev_name {
@@ -571,6 +616,7 @@ char *partition_name(dev_t dev)
 	dname->name = NULL;
 	if (hd)
 		dname->name = disk_name(hd, part, dname->namebuf);
+	put_disk(hd);
 	if (!dname->name) {
 		sprintf(dname->namebuf, "[dev %s]", kdevname(to_kdev_t(dev)));
 		dname->name = dname->namebuf;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7bdea5bbe922..cbafa4129498 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -38,6 +38,7 @@
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/times.h>
+#include <linux/profile.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index e749c3c3bbed..8364f6c3eb41 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -36,7 +36,6 @@
 #include <linux/mpage.h>
 
 
-STATIC int delalloc_convert(struct inode *, struct page *, int, int);
 
 STATIC int
 map_blocks(
@@ -50,17 +49,11 @@ map_blocks(
 	int			error, nmaps = 1;
 
 retry:
-	if (flags & PBF_FILE_ALLOCATE) {
-		VOP_STRATEGY(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	} else {
-		VOP_BMAP(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	}
+	VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
 	if (flags & PBF_WRITE) {
 		if (unlikely((flags & PBF_DIRECT) && nmaps &&
 		    (pbmapp->pbm_flags & PBMF_DELAY))) {
-			flags = PBF_WRITE | PBF_FILE_ALLOCATE;
+			flags = PBF_FILE_ALLOCATE;
 			goto retry;
 		}
 		VMODIFY(vp);
@@ -130,83 +123,6 @@ map_buffer_at_offset(
 }
 
 /*
- * Convert delalloc space to real space, do not flush the
- * data out to disk, that will be done by the caller.
- */
-STATIC int
-release_page(
-	struct page		*page)
-{
-	struct inode		*inode = (struct inode*)page->mapping->host;
-	unsigned long		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	int			ret;
-
-	/* Are we off the end of the file ? */
-	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-		if ((page->index >= end_index+1) || !offset) {
-			ret =  -EIO;
-			goto out;
-		}
-	}
-
-	ret = delalloc_convert(inode, page, 0, 0);
-
-out:
-	if (ret < 0) {
-		block_invalidatepage(page, 0);
-		ClearPageUptodate(page);
-
-		return 0;
-	}
-
-	return 1;
-}
-
-/*
- * Convert delalloc or unmapped space to real space and flush out
- * to disk.
- */
-STATIC int
-write_full_page(
-	struct page		*page,
-	int			delalloc)
-{
-	struct inode		*inode = (struct inode*)page->mapping->host;
-	unsigned long		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	int			ret;
-
-	/* Are we off the end of the file ? */
-	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-		if ((page->index >= end_index+1) || !offset) {
-			ret =  -EIO;
-			goto out;
-		}
-	}
-
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-	}
-
-	ret = delalloc_convert(inode, page, 1, delalloc == 0);
-
-out:
-	if (ret < 0) {
-		/*
-		 * If it's delalloc and we have nowhere to put it,
-		 * throw it away.
-		 */
-		if (delalloc)
-			block_invalidatepage(page, 0);
-		ClearPageUptodate(page);
-		unlock_page(page);
-	}
-
-	return ret;
-}
-
-/*
  * Look for a page at index which is unlocked and not mapped
  * yet - clustering for mmap write case.
  */
@@ -347,16 +263,21 @@ submit_page(
 		end_page_writeback(page);
 }
 
-STATIC int
-map_page(
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc pages only, for the original page it is possible that
+ * the page has no mapping at all.
+ */
+STATIC void
+convert_page(
 	struct inode		*inode,
 	struct page		*page,
 	page_buf_bmap_t		*maps,
-	struct buffer_head	*bh_arr[],
 	int			startio,
 	int			all_bh)
 {
-	struct buffer_head	*bh, *head;
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	page_buf_bmap_t		*mp = maps, *tmp;
 	unsigned long		end, offset, end_index;
 	int			i = 0, index = 0;
@@ -393,32 +314,12 @@ map_page(
 		}
 	} while (i++, (bh = bh->b_this_page) != head);
 
-	return index;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc pages only, for the original page it is possible that
- * the page has no mapping at all.
- */
-STATIC void
-convert_page(
-	struct inode		*inode,
-	struct page		*page,
-	page_buf_bmap_t		*maps,
-	int			startio,
-	int			all_bh)
-{
-	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE];
-	int			cnt;
-
-	cnt = map_page(inode, page, maps, bh_arr, startio, all_bh);
 	if (startio) {
-		submit_page(page, bh_arr, cnt);
+		submit_page(page, bh_arr, index);
 	} else {
 		unlock_page(page);
 	}
+
 	page_cache_release(page);
 }
 
@@ -439,40 +340,47 @@ cluster_write(
 
 	tlast = (mp->pbm_offset + mp->pbm_bsize) >> PAGE_CACHE_SHIFT;
 	for (; tindex < tlast; tindex++) {
-		if (!(page = probe_page(inode, tindex)))
+		page = probe_page(inode, tindex);
+		if (!page)
 			break;
 		convert_page(inode, page, mp, startio, all_bh);
 	}
 }
 
 /*
- * Calling this without allocate_space set means we are being asked to
- * flush a dirty buffer head. When called with async_write set then we
- * are coming from writepage. A writepage call with allocate_space set
- * means we are being asked to write out all of the page which is before
- * EOF and therefore need to allocate space for unmapped portions of the
- * page.
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers.  When called with startio set then
+ * we are coming from writepage.
  */
 STATIC int
 delalloc_convert(
-	struct inode		*inode,		/* inode containing page */
-	struct page		*page,		/* page to convert - locked */
-	int			startio,	/* start io on the page */
+	struct page		*page,
+	int			startio,
 	int			allocate_space)
 {
-	struct buffer_head	*bh, *head;
-	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE];
+	struct inode		*inode = page->mapping->host;
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	page_buf_bmap_t		*mp, map;
-	int			i, cnt = 0;
-	int			len, err;
-	unsigned long		p_offset = 0;
-	loff_t			offset;
-	loff_t			end_offset;
+	unsigned long		p_offset = 0, end_index;
+	loff_t			offset, end_offset;
+	int			len, err, i, cnt = 0;
+
+	/* Are we off the end of the file ? */
+	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	if (page->index >= end_index) {
+		unsigned remaining = inode->i_size & (PAGE_CACHE_SIZE-1);
+		if ((page->index >= end_index+1) || !remaining) {
+			return -EIO;
+		}
+	}
 
 	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	end_offset = offset + PAGE_CACHE_SIZE;
 	if (end_offset > inode->i_size)
 		end_offset = inode->i_size;
+	
+	if (startio && !page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 
 	bh = head = page_buffers(page);
 	mp = NULL;
@@ -490,9 +398,10 @@ delalloc_convert(
 		if (buffer_delay(bh)) {
 			if (!mp) {
 				err = map_blocks(inode, offset, len, &map,
-						PBF_WRITE|PBF_FILE_ALLOCATE);
-				if (err)
+						PBF_FILE_ALLOCATE);
+				if (err) {
 					goto error;
+				}
 				mp = match_offset_to_mapping(page, &map,
 								p_offset);
 			}
@@ -517,8 +426,9 @@ delalloc_convert(
 								bh, head);
 				err = map_blocks(inode, offset, size, &map,
 						PBF_WRITE|PBF_DIRECT);
-				if (err)
+				if (err) {
 					goto error;
+				}
 				mp = match_offset_to_mapping(page, &map,
 								p_offset);
 			}
@@ -544,12 +454,14 @@ next_bh:
 		bh = bh->b_this_page;
 	} while (offset < end_offset);
 
-	if (startio)
+	if (startio) {
 		submit_page(page, bh_arr, cnt);
+	}
 
-	if (mp)
+	if (mp) {
 		cluster_write(inode, page->index + 1, mp,
 				startio, allocate_space);
+	}
 
 	return 0;
 
@@ -557,7 +469,15 @@ error:
 	for (i = 0; i < cnt; i++) {
 		unlock_buffer(bh_arr[i]);
 	}
-
+	
+	/*
+	 * If it's delalloc and we have nowhere to put it,
+	 * throw it away.
+	 */
+	if (!allocate_space) {
+		block_invalidatepage(page, 0);
+	}
+	ClearPageUptodate(page);
 	return err;
 }
 
@@ -591,7 +511,7 @@ linvfs_get_block_core(
 	}
 
 	VOP_BMAP(vp, offset, size,
-		create ? flags : PBF_READ, NULL,
+		create ? flags : PBF_READ,
 		(struct page_buf_bmap_s *)&pbmap, &retpbbm, error);
 	if (error)
 		return -error;
@@ -745,14 +665,12 @@ count_page_state(
 
 		bh = head = page_buffers(page);
 		do {
-			if (buffer_uptodate(bh) && !buffer_mapped(bh)) {
+			if (buffer_uptodate(bh) && !buffer_mapped(bh))
 				(*nr_unmapped)++;
-				continue;
-			}
-			if (!buffer_delay(bh))
-				continue;
-			(*nr_delalloc)++;
+			else if (buffer_delay(bh))
+				(*nr_delalloc)++;
 		} while ((bh = bh->b_this_page) != head);
+
 		return 1;
 	}
 
@@ -764,20 +682,22 @@ linvfs_writepage(
 	struct page		*page)
 {
 	int			error;
-	int			need_trans;
+	int			need_trans = 1;
 	int			nr_delalloc, nr_unmapped;
 
-	if (count_page_state(page, &nr_delalloc, &nr_unmapped)) {
+	if (count_page_state(page, &nr_delalloc, &nr_unmapped))
 		need_trans = nr_delalloc + nr_unmapped;
-	} else {
-		need_trans = 1;
-	}
 
 	if ((current->flags & (PF_FSTRANS)) && need_trans)
 		goto out_fail;
 
-	error = write_full_page(page, nr_delalloc);
-
+	/*
+	 * Convert delalloc or unmapped space to real space and flush out
+	 * to disk.
+	 */
+	error = delalloc_convert(page, 1, nr_delalloc == 0);
+	if (unlikely(error))
+		unlock_page(page);
 	return error;
 
 out_fail:
@@ -812,24 +732,26 @@ linvfs_release_page(
 	struct page		*page,
 	int			gfp_mask)
 {
-	int			need_trans;
 	int			nr_delalloc, nr_unmapped;
 
 	if (count_page_state(page, &nr_delalloc, &nr_unmapped)) {
-		need_trans = nr_delalloc;
-	} else {
-		need_trans = 0;
-	}
-
-	if (need_trans == 0) {
-		return try_to_free_buffers(page);
-	}
+		if (!nr_delalloc)
+			goto free_buffers;
+	} 
 
 	if (gfp_mask & __GFP_FS) {
-		if (release_page(page) == 0)
-			return try_to_free_buffers(page);
+		/*
+		 * Convert delalloc space to real space, do not flush the
+		 * data out to disk, that will be done by the caller.
+		 */
+		if (delalloc_convert(page, 0, 0) == 0)
+			goto free_buffers;
 	}
+
 	return 0;
+
+free_buffers:
+	return try_to_free_buffers(page);
 }
 
 
diff --git a/fs/xfs/linux/xfs_fs_subr.c b/fs/xfs/linux/xfs_fs_subr.c
index 8d50bd04d718..eea74fce0050 100644
--- a/fs/xfs/linux/xfs_fs_subr.c
+++ b/fs/xfs/linux/xfs_fs_subr.c
@@ -135,7 +135,6 @@ fs_flushinval_pages(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 
 	if (VN_CACHED(vp)) {
-		filemap_fdatawait(ip->i_mapping);
 		filemap_fdatawrite(ip->i_mapping);
 		filemap_fdatawait(ip->i_mapping);
 
@@ -159,7 +158,6 @@ fs_flush_pages(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 
 	if (VN_CACHED(vp)) {
-		filemap_fdatawait(ip->i_mapping);
 		filemap_fdatawrite(ip->i_mapping);
 		filemap_fdatawait(ip->i_mapping);
 	}
diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
index 54a4343289f1..7f0ac30a83ba 100644
--- a/fs/xfs/linux/xfs_globals.c
+++ b/fs/xfs/linux/xfs_globals.c
@@ -41,12 +41,6 @@ uint64_t	xfs_panic_mask;		/* set to cause more panics */
 unsigned long	xfs_physmem;
 
 /*
- * restricted_chown = 1	 bsd style chown(2), only super-user can give away files
- * restricted_chown = 0	 sysV style chown(2), non super-user can give away files
- */
-int		restricted_chown = 1;
-
-/*
  * Used to serialize atomicIncWithWrap.
  */
 spinlock_t Atomic_spin = SPIN_LOCK_UNLOCKED;
@@ -69,3 +63,6 @@ mutex_t		xfs_Gqm_lock;
 EXPORT_SYMBOL(xfs_Gqm);
 EXPORT_SYMBOL(xfs_next_bit);
 EXPORT_SYMBOL(xfs_contig_bits);
+EXPORT_SYMBOL(xfs_bmbt_get_all);
+EXPORT_SYMBOL(xfs_bmbt_disk_get_all);
+
diff --git a/fs/xfs/linux/xfs_globals.h b/fs/xfs/linux/xfs_globals.h
index 943e029f1d42..07c9856b1353 100644
--- a/fs/xfs/linux/xfs_globals.h
+++ b/fs/xfs/linux/xfs_globals.h
@@ -39,7 +39,6 @@
 
 extern uint64_t xfs_panic_mask;		/* set to cause more panics */
 
-extern int	restricted_chown;
 extern unsigned long	xfs_physmem;
 
 extern struct cred *sys_cred;
diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
index 5dbf4fd9debf..03451043e983 100644
--- a/fs/xfs/linux/xfs_ioctl.c
+++ b/fs/xfs/linux/xfs_ioctl.c
@@ -67,7 +67,7 @@ xfs_find_handle(
 	if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq)))
 		return -XFS_ERROR(EFAULT);
 
-	bzero((char *)&handle, sizeof(handle));
+	memset((char *)&handle, 0, sizeof(handle));
 
 	switch (cmd) {
 	case XFS_IOC_PATH_TO_FSHANDLE:
@@ -228,7 +228,7 @@ xfs_vget_fsop_handlereq(
 	if (copy_from_user(handlep, hanp, hlen))
 		return XFS_ERROR(EFAULT);
 	if (hlen < sizeof(*handlep))
-		bzero(((char *)handlep) + hlen, sizeof(*handlep) - hlen);
+		memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
 	if (hlen > sizeof(handlep->ha_fsid)) {
 		if (handlep->ha_fid.xfs_fid_len !=
 				(hlen - sizeof(handlep->ha_fsid)
@@ -264,12 +264,6 @@ xfs_vget_fsop_handlereq(
 	vpp = XFS_ITOV(ip);
 	inodep = LINVFS_GET_IP(vpp);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	error = linvfs_revalidate_core(inodep, ATTR_COMM);
-	if (error) {
-		iput(inodep);
-		/* this error is (-) but our callers expect + */
-		return XFS_ERROR(-error);
-	}
 
 	*vp = vpp;
 	*inode = inodep;
diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
index 3beca5b8fd34..d2ca5a30238a 100644
--- a/fs/xfs/linux/xfs_iops.c
+++ b/fs/xfs/linux/xfs_iops.c
@@ -91,14 +91,14 @@ linvfs_mknod(
 		mode &= ~current->fs->umask;
 #endif
 
-	bzero(&va, sizeof(va));
+	memset(&va, 0, sizeof(va));
 	va.va_mask = AT_TYPE|AT_MODE;
 	va.va_type = IFTOVT(mode);
 	va.va_mode = mode;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		va.va_rdev = rdev;
+		va.va_rdev = XFS_MKDEV(MAJOR(rdev), MINOR(rdev));
 		va.va_mask |= AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
@@ -122,8 +122,6 @@ linvfs_mknod(
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = to_kdev_t(rdev);
-		/* linvfs_revalidate_core returns (-) errors */
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 		validate_fields(dir);
 		d_instantiate(dentry, ip);
 		mark_inode_dirty_sync(ip);
@@ -186,7 +184,6 @@ linvfs_lookup(
 			VN_RELE(cvp);
 			return ERR_PTR(-EACCES);
 		}
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 	}
 	if (error && (error != ENOENT))
 		return ERR_PTR(-error);
@@ -262,14 +259,13 @@ linvfs_symlink(
 
 	dvp = LINVFS_GET_VP(dir);
 
-	bzero(&va, sizeof(va));
+	memset(&va, 0, sizeof(va));
 	va.va_type = VLNK;
-	va.va_mode = 0777 & ~current->fs->umask;
-	va.va_mask = AT_TYPE|AT_MODE; /* AT_PROJID? */
+	va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+	va.va_mask = AT_TYPE|AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, &va, (char *)symname,
-							&cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
 	if (!error) {
 		ASSERT(cvp);
 		ASSERT(cvp->v_type == VLNK);
@@ -278,10 +274,9 @@ linvfs_symlink(
 			error = ENOMEM;
 			VN_RELE(cvp);
 		} else {
-			/* linvfs_revalidate_core returns (-) errors */
-			error = -linvfs_revalidate_core(ip, ATTR_COMM);
 			d_instantiate(dentry, ip);
 			validate_fields(dir);
+			validate_fields(ip); /* size needs update */
 			mark_inode_dirty_sync(ip);
 			mark_inode_dirty_sync(dir);
 		}
@@ -369,7 +364,7 @@ linvfs_readlink(
 }
 
 /*
- * careful here - this function can get called recusively, so
+ * careful here - this function can get called recursively, so
  * we need to be very careful about how much stack we use.
  * uio is kmalloced for this reason...
  */
@@ -441,16 +436,6 @@ linvfs_permission(
  * from the results of a getattr. This gets called out of things
  * like stat.
  */
-int
-linvfs_revalidate_core(
-	struct inode	*inode,
-	int		flags)
-{
-	vnode_t		*vp = LINVFS_GET_VP(inode);
-
-	/* vn_revalidate returns (-) error so this is ok */
-	return vn_revalidate(vp, flags);
-}
 
 STATIC int
 linvfs_getattr(
@@ -463,7 +448,7 @@ linvfs_getattr(
 	int		error = 0;
 
 	if (unlikely(vp->v_flag & VMODIFIED)) {
-		error = linvfs_revalidate_core(inode, 0);
+		error = vn_revalidate(vp);
 	}
 	if (!error)
 		generic_fillattr(inode, stat);
@@ -528,7 +513,7 @@ linvfs_setattr(
 	}
 
 	if (!error) {
-		vn_revalidate(vp, 0);
+		vn_revalidate(vp);
 		mark_inode_dirty_sync(inode);
 	}
 	return error;
@@ -618,30 +603,17 @@ linvfs_setxattr(
 		error = -ENOATTR;
 		p += xfs_namespaces[SYSTEM_NAMES].namelen;
 		if (strcmp(p, POSIXACL_ACCESS) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-			if (!error) {
-				VMODIFY(vp);
-				error = linvfs_revalidate_core(inode, 0);
-			}
 		}
 		else if (strcmp(p, POSIXACL_DEFAULT) == 0) {
-			error = linvfs_revalidate_core(inode, 0);
-			if (error)
-				return error;
 			error = xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-			if (!error) {
-				VMODIFY(vp);
-				error = linvfs_revalidate_core(inode, 0);
-			}
 		}
 		else if (strcmp(p, POSIXCAP) == 0) {
 			error = xfs_cap_vset(vp, data, size);
 		}
+		if (!error) {
+			error = vn_revalidate(vp);
+		}
 		return error;
 	}
 
@@ -689,19 +661,9 @@ linvfs_getxattr(
 		error = -ENOATTR;
 		p += xfs_namespaces[SYSTEM_NAMES].namelen;
 		if (strcmp(p, POSIXACL_ACCESS) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
 		}
 		else if (strcmp(p, POSIXACL_DEFAULT) == 0) {
-			if (vp->v_flag & VMODIFIED) {
-				error = linvfs_revalidate_core(inode, 0);
-				if (error)
-					return error;
-			}
 			error = xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
 		}
 		else if (strcmp(p, POSIXCAP) == 0) {
diff --git a/fs/xfs/linux/xfs_iops.h b/fs/xfs/linux/xfs_iops.h
index 3c4529374aec..c5ce4a6ea9f9 100644
--- a/fs/xfs/linux/xfs_iops.h
+++ b/fs/xfs/linux/xfs_iops.h
@@ -65,7 +65,6 @@ extern struct file_operations linvfs_dir_operations;
 
 extern struct address_space_operations linvfs_aops;
 
-extern int linvfs_revalidate_core(struct inode *, int);
 extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
index 7def3bb302b8..49bb2095c10c 100644
--- a/fs/xfs/linux/xfs_linux.h
+++ b/fs/xfs/linux/xfs_linux.h
@@ -67,6 +67,10 @@
 #define STATIC static
 #endif
 
+#define restricted_chown	xfs_params.restrict_chown
+#define irix_sgid_inherit	xfs_params.sgid_inherit
+#define irix_symlink_mode	xfs_params.symlink_mode
+
 typedef struct xfs_dirent {		/* data from readdir() */
 	xfs_ino_t	d_ino;		/* inode number of entry */
 	xfs_off_t	d_off;		/* offset of disk directory entry */
diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index c7467e2acce3..9f4a37c46f32 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -43,16 +43,16 @@
 						<< mp->m_writeio_log)
 #define XFS_STRAT_WRITE_IMAPS	2
 
-STATIC int xfs_iomap_read(xfs_iocore_t *, loff_t, size_t, int, pb_bmap_t *,
-			int *, struct pm *);
-STATIC int xfs_iomap_write(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
-			int *, int, struct pm *);
-STATIC int xfs_iomap_write_delay(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
+STATIC int xfs_iomap_read(xfs_iocore_t *, loff_t, size_t, int, page_buf_bmap_t *,
+			int *);
+STATIC int xfs_iomap_write(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
+			int *, int);
+STATIC int xfs_iomap_write_delay(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
 			int *, int, int);
-STATIC int xfs_iomap_write_direct(xfs_iocore_t *, loff_t, size_t, pb_bmap_t *,
+STATIC int xfs_iomap_write_direct(xfs_iocore_t *, loff_t, size_t, page_buf_bmap_t *,
 			int *, int, int);
 STATIC int _xfs_imap_to_bmap(xfs_iocore_t *, xfs_off_t, xfs_bmbt_irec_t *,
-			pb_bmap_t *, int, int);
+			page_buf_bmap_t *, int, int);
 
 
 /*
@@ -136,11 +136,14 @@ xfs_read(
 	xfs_fsize_t		n;
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
+	vnode_t			*vp;
 	unsigned long		seg;
 	int			direct = filp->f_flags & O_DIRECT;
 
 	ip = XFS_BHVTOI(bdp);
+	vp = BHV_TO_VNODE(bdp);
 	mp = ip->i_mount;
+	vn_trace_entry(vp, "xfs_read", (inst_t *)__return_address);
 
 	XFS_STATS_INC(xfsstats.xs_read_calls);
 
@@ -194,7 +197,7 @@ xfs_read(
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
 	    !(filp->f_mode & FINVIS)) {
 		int error;
 		vrwlock_t locktype = VRWLOCK_READ;
@@ -230,8 +233,7 @@ xfs_zero_last_block(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,
 	xfs_fsize_t	isize,
-	xfs_fsize_t	end_size,
-	struct pm	*pmp)
+	xfs_fsize_t	end_size)
 {
 	xfs_fileoff_t	last_fsb;
 	xfs_mount_t	*mp;
@@ -310,8 +312,7 @@ xfs_zero_eof(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,		/* starting I/O offset */
 	xfs_fsize_t	isize,		/* current inode size */
-	xfs_fsize_t	end_size,	/* terminal inode size */
-	struct pm	*pmp)
+	xfs_fsize_t	end_size)	/* terminal inode size */
 {
 	struct inode	*ip = LINVFS_GET_IP(vp);
 	xfs_fileoff_t	start_zero_fsb;
@@ -337,7 +338,7 @@ xfs_zero_eof(
 	 * First handle zeroing the block on which isize resides.
 	 * We only zero a part of that block so it is handled specially.
 	 */
-	error = xfs_zero_last_block(ip, io, offset, isize, end_size, pmp);
+	error = xfs_zero_last_block(ip, io, offset, isize, end_size);
 	if (error) {
 		ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
 		ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -469,6 +470,7 @@ xfs_write(
 	XFS_STATS_INC(xfsstats.xs_write_calls);
 
 	vp = BHV_TO_VNODE(bdp);
+	vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address);
 	xip = XFS_BHVTOI(bdp);
 
 	/* START copy & waste from filemap.c */
@@ -592,7 +594,7 @@ start:
 
 	if (!direct && (*offset > isize && isize)) {
 		error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset,
-			isize, *offset + size, NULL);
+			isize, *offset + size);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
 			return(-error);
@@ -755,112 +757,15 @@ retry:
 	return(ret);
 }
 
-/*
- * xfs_bmap() is the same as the irix xfs_bmap from xfs_rw.c
- * execpt for slight changes to the params
- */
-int
-xfs_bmap(bhv_desc_t	*bdp,
-	xfs_off_t	offset,
-	ssize_t		count,
-	int		flags,
-	struct cred	*cred,
-	pb_bmap_t	*pbmapp,
-	int		*npbmaps)
-{
-	xfs_inode_t	*ip;
-	int		error;
-	int		lockmode;
-	int		fsynced = 0;
-	vnode_t		*vp;
-
-	ip = XFS_BHVTOI(bdp);
-	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
-	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-	ASSERT((flags & PBF_READ) || (flags & PBF_WRITE));
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_iocore.io_mount))
-		return XFS_ERROR(EIO);
-
-	if (flags & PBF_READ) {
-		lockmode = xfs_ilock_map_shared(ip);
-		error = xfs_iomap_read(&ip->i_iocore, offset, count,
-				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps, NULL);
-		xfs_iunlock_map_shared(ip, lockmode);
-	} else { /* PBF_WRITE */
-		ASSERT(flags & PBF_WRITE);
-		vp = BHV_TO_VNODE(bdp);
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-		/*
-		 * Make sure that the dquots are there. This doesn't hold
-		 * the ilock across a disk read.
-		 */
-
-		if (XFS_IS_QUOTA_ON(ip->i_mount)) {
-			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-				if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
-					xfs_iunlock(ip, XFS_ILOCK_EXCL);
-					return XFS_ERROR(error);
-				}
-			}
-		}
-retry:
-		error = xfs_iomap_write(&ip->i_iocore, offset, count,
-					pbmapp, npbmaps, flags, NULL);
-		/* xfs_iomap_write unlocks/locks/unlocks */
-
-		if (error == ENOSPC) {
-			switch (fsynced) {
-			case 0:
-				if (ip->i_delayed_blks) {
-					filemap_fdatawrite(LINVFS_GET_IP(vp)->i_mapping);
-					fsynced = 1;
-				} else {
-					fsynced = 2;
-					flags |= PBF_SYNC;
-				}
-				error = 0;
-				xfs_ilock(ip, XFS_ILOCK_EXCL);
-				goto retry;
-			case 1:
-				fsynced = 2;
-				if (!(flags & PBF_SYNC)) {
-					flags |= PBF_SYNC;
-					error = 0;
-					xfs_ilock(ip, XFS_ILOCK_EXCL);
-					goto retry;
-				}
-			case 2:
-				sync_blockdev(vp->v_vfsp->vfs_super->s_bdev);
-				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-						XFS_LOG_FORCE|XFS_LOG_SYNC);
-
-				error = 0;
-/**
-				delay(HZ);
-**/
-				fsynced++;
-				xfs_ilock(ip, XFS_ILOCK_EXCL);
-				goto retry;
-			}
-		}
-	}
-
-	return XFS_ERROR(error);
-}
 
 int
-xfs_strategy(bhv_desc_t *bdp,
+xfs_strategy(xfs_inode_t *ip,
 	xfs_off_t	offset,
 	ssize_t		count,
 	int		flags,
-	struct cred	*cred,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps)
 {
-	xfs_inode_t	*ip;
 	xfs_iocore_t	*io;
 	xfs_mount_t	*mp;
 	int		error;
@@ -876,20 +781,16 @@ xfs_strategy(bhv_desc_t *bdp,
 	xfs_bmbt_irec_t imap[XFS_MAX_RW_NBMAPS];
 	xfs_trans_t	*tp;
 
-	ip = XFS_BHVTOI(bdp);
 	io = &ip->i_iocore;
 	mp = ip->i_mount;
 	/* is_xfs = IO_IS_XFS(io); */
 	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
 	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
 	       ((io->io_flags & XFS_IOCORE_RT) != 0));
-	ASSERT((flags & PBF_READ) || (flags & PBF_WRITE));
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	ASSERT(flags & PBF_WRITE);
-
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	nimaps = min(XFS_MAX_RW_NBMAPS, *npbmaps);
 	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
@@ -1082,12 +983,110 @@ xfs_strategy(bhv_desc_t *bdp,
 }
 
 
+/*
+ * xfs_bmap() is the same as the irix xfs_bmap from xfs_rw.c
+ * execpt for slight changes to the params
+ */
+int
+xfs_bmap(bhv_desc_t	*bdp,
+	xfs_off_t	offset,
+	ssize_t		count,
+	int		flags,
+	page_buf_bmap_t	*pbmapp,
+	int		*npbmaps)
+{
+	xfs_inode_t	*ip;
+	int		error;
+	int		lockmode;
+	int		fsynced = 0;
+	vnode_t		*vp;
+
+	ip = XFS_BHVTOI(bdp);
+	ASSERT((ip->i_d.di_mode & IFMT) == IFREG);
+	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_iocore.io_mount))
+		return XFS_ERROR(EIO);
+
+	if (flags & PBF_READ) {
+		lockmode = xfs_ilock_map_shared(ip);
+		error = xfs_iomap_read(&ip->i_iocore, offset, count,
+				 XFS_BMAPI_ENTIRE, pbmapp, npbmaps);
+		xfs_iunlock_map_shared(ip, lockmode);
+	} else if (flags & PBF_FILE_ALLOCATE) {
+		error = xfs_strategy(ip, offset, count, flags,
+				pbmapp, npbmaps);
+	} else { /* PBF_WRITE */
+		ASSERT(flags & PBF_WRITE);
+		vp = BHV_TO_VNODE(bdp);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		/*
+		 * Make sure that the dquots are there. This doesn't hold
+		 * the ilock across a disk read.
+		 */
+
+		if (XFS_IS_QUOTA_ON(ip->i_mount)) {
+			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+				if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
+					xfs_iunlock(ip, XFS_ILOCK_EXCL);
+					return XFS_ERROR(error);
+				}
+			}
+		}
+retry:
+		error = xfs_iomap_write(&ip->i_iocore, offset, count,
+					pbmapp, npbmaps, flags);
+		/* xfs_iomap_write unlocks/locks/unlocks */
+
+		if (error == ENOSPC) {
+			switch (fsynced) {
+			case 0:
+				if (ip->i_delayed_blks) {
+					filemap_fdatawrite(LINVFS_GET_IP(vp)->i_mapping);
+					fsynced = 1;
+				} else {
+					fsynced = 2;
+					flags |= PBF_SYNC;
+				}
+				error = 0;
+				xfs_ilock(ip, XFS_ILOCK_EXCL);
+				goto retry;
+			case 1:
+				fsynced = 2;
+				if (!(flags & PBF_SYNC)) {
+					flags |= PBF_SYNC;
+					error = 0;
+					xfs_ilock(ip, XFS_ILOCK_EXCL);
+					goto retry;
+				}
+			case 2:
+				sync_blockdev(vp->v_vfsp->vfs_super->s_bdev);
+				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+						XFS_LOG_FORCE|XFS_LOG_SYNC);
+
+				error = 0;
+/**
+				delay(HZ);
+**/
+				fsynced++;
+				xfs_ilock(ip, XFS_ILOCK_EXCL);
+				goto retry;
+			}
+		}
+	}
+
+	return XFS_ERROR(error);
+}
+
+
 STATIC int
 _xfs_imap_to_bmap(
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,
 	xfs_bmbt_irec_t *imap,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		imaps,			/* Number of imap entries */
 	int		pbmaps)			/* Number of pbmap entries */
 {
@@ -1138,9 +1137,8 @@ xfs_iomap_read(
 	loff_t		offset,
 	size_t		count,
 	int		flags,
-	pb_bmap_t	*pbmapp,
-	int		*npbmaps,
-	struct pm	*pmp)
+	page_buf_bmap_t	*pbmapp,
+	int		*npbmaps)
 {
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	end_fsb;
@@ -1191,10 +1189,9 @@ xfs_iomap_write(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
-	int		ioflag,
-	struct pm	*pmp)
+	int		ioflag)
 {
 	int		maps;
 	int		error = 0;
@@ -1211,7 +1208,7 @@ xfs_iomap_write(
 	 */
 
 	found = 0;
-	error = xfs_iomap_read(io, offset, count, flags, pbmapp, npbmaps, NULL);
+	error = xfs_iomap_read(io, offset, count, flags, pbmapp, npbmaps);
 	if (error)
 		goto out;
 
@@ -1260,7 +1257,7 @@ xfs_write_bmap(
 	xfs_mount_t	*mp,
 	xfs_iocore_t	*io,
 	xfs_bmbt_irec_t *imapp,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		iosize,
 	xfs_fileoff_t	ioalign,
 	xfs_fsize_t	isize)
@@ -1330,7 +1327,7 @@ xfs_iomap_write_delay(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
 	int		ioflag,
 	int		found)
@@ -1528,7 +1525,7 @@ xfs_iomap_write_direct(
 	xfs_iocore_t	*io,
 	loff_t		offset,
 	size_t		count,
-	pb_bmap_t	*pbmapp,
+	page_buf_bmap_t	*pbmapp,
 	int		*npbmaps,
 	int		ioflag,
 	int		found)
@@ -1830,36 +1827,22 @@ XFS_log_write_unmount_ro(bhv_desc_t	*bdp)
 }
 
 /*
- * In these two situations we disregard the readonly mount flag and
- * temporarily enable writes (we must, to ensure metadata integrity).
+ * If the underlying (log or data) device is readonly, there are some
+ * operations that cannot proceed.
  */
-STATIC int
-xfs_is_read_only(xfs_mount_t *mp)
+int
+xfs_dev_is_read_only(xfs_mount_t *mp, char *message)
 {
 	if (bdev_read_only(mp->m_ddev_targp->pbr_bdev) ||
-	    bdev_read_only(mp->m_logdev_targp->pbr_bdev)) {
+	    bdev_read_only(mp->m_logdev_targp->pbr_bdev) ||
+	   (mp->m_rtdev_targp && bdev_read_only(mp->m_rtdev_targp->pbr_bdev))) {
+		cmn_err(CE_NOTE,
+			"XFS: %s required on read-only device.", message);
 		cmn_err(CE_NOTE,
 			"XFS: write access unavailable, cannot proceed.");
 		return EROFS;
 	}
-	cmn_err(CE_NOTE,
-		"XFS: write access will be enabled during mount.");
-	XFS_MTOVFS(mp)->vfs_flag &= ~VFS_RDONLY;
-	return 0;
-}
 
-int
-xfs_recover_read_only(xlog_t *log)
-{
-	cmn_err(CE_NOTE, "XFS: WARNING: "
-		"recovery required on readonly filesystem.");
-	return xfs_is_read_only(log->l_mp);
+	return 0;
 }
 
-int
-xfs_quotacheck_read_only(xfs_mount_t *mp)
-{
-	cmn_err(CE_NOTE, "XFS: WARNING: "
-		"quotacheck required on readonly filesystem.");
-	return xfs_is_read_only(mp);
-}
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 0ea2cfe9a860..3ac8eddedb23 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -39,13 +39,12 @@
  */
 #define XFS_MAX_RW_NBMAPS	4
 
-extern int xfs_bmap (bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, pb_bmap_t *, int *);
-extern int xfs_strategy (bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, pb_bmap_t *, int *);
+extern int xfs_bmap (bhv_desc_t *, xfs_off_t, ssize_t, int, page_buf_bmap_t *, int *);
 extern int xfsbdstrat (struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb (struct xfs_buf *);
 
 extern int xfs_zero_eof (vnode_t *, struct xfs_iocore *, xfs_off_t,
-				xfs_fsize_t, xfs_fsize_t, struct pm *);
+				xfs_fsize_t, xfs_fsize_t);
 extern ssize_t xfs_read (
 	struct bhv_desc		*bdp,
 	struct file		*filp,
@@ -62,8 +61,7 @@ extern ssize_t xfs_write (
 	loff_t			*offp,
 	struct cred		*credp);
 
-extern int xfs_recover_read_only (xlog_t *);
-extern int xfs_quotacheck_read_only (xfs_mount_t *);
+extern int xfs_dev_is_read_only(xfs_mount_t *, char *);
 
 extern void XFS_log_write_unmount_ro (bhv_desc_t *);
 
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 2dfaf44b0f7e..27be367ec316 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -92,8 +92,6 @@ STATIC struct export_operations linvfs_export_ops;
 #define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
 #define MNTOPT_NORECOVERY "norecovery"	/* don't run XFS recovery */
-#define MNTOPT_OSYNCISDSYNC "osyncisdsync" /* o_sync == o_dsync on this fs */
-					   /* (this is now the default!) */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_QUOTA	"quota"		/* disk quotas */
 #define MNTOPT_MRQUOTA	"mrquota"	/* don't turnoff if SB has quotas on */
@@ -104,7 +102,6 @@ STATIC struct export_operations linvfs_export_ops;
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce" /* same as uqnoenforce */
 #define MNTOPT_NOUUID	"nouuid"	/* Ignore FS uuid */
-#define MNTOPT_IRIXSGID "irixsgid"	/* Irix-style sgid inheritance */
 #define MNTOPT_NOLOGFLUSH  "nologflush"	/* Don't use hard flushes in
 					   log writing */
 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
@@ -124,6 +121,9 @@ xfs_parseargs(
 
 	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
 
+	/* Default to 32 bit inodes on linux all the time */
+	args->flags |= XFSMNT_32BITINODES;
+
 	/* Copy the already-parsed mount(2) flags we're interested in */
 	if (flags & MS_NOATIME)
 		args->flags |= XFSMNT_NOATIME;
@@ -175,9 +175,6 @@ xfs_parseargs(
 			args->iosizelog = (uint8_t) iosize;
 		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
 			args->flags |= XFSMNT_WSYNC;
-		} else if (!strcmp(this_char, MNTOPT_OSYNCISDSYNC)) {
-			/* no-op, this is now the default */
-printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
 			args->flags |= XFSMNT_OSYNCISOSYNC;
 		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
@@ -212,10 +209,13 @@ printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 			dswidth = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
 			args->flags |= XFSMNT_NOUUID;
-		} else if (!strcmp(this_char, MNTOPT_IRIXSGID)) {
-			args->flags |= XFSMNT_IRIXSGID;
 		} else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
 			args->flags |= XFSMNT_NOLOGFLUSH;
+		} else if (!strcmp(this_char, "osyncisdsync")) {
+			/* no-op, this is now the default */
+printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
+		} else if (!strcmp(this_char, "irixsgid")) {
+printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
 		} else {
 			printk("XFS: unknown mount option [%s].\n", this_char);
 			return rval;
@@ -264,53 +264,72 @@ printk("XFS: osyncisdsync is now the default, and will soon be deprecated.\n");
 	return 0;
 }
 
-/*
- * Convert one device special file to a dev_t.
- * Helper routine, used only by spectodevs below.
- */
 STATIC int
-spectodev(
-	const char		*name,
-	const char		*id,
-	dev_t			*dev)
+xfs_showargs(
+	struct vfs		*vfsp,
+	struct seq_file		*m)
 {
-	struct nameidata	nd;
-	int			error;
+	static struct proc_xfs_info {
+		int	flag;
+		char	*str;
+	} xfs_info[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
+		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
+		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
+		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
+		{ 0, NULL }
+	};
+	struct proc_xfs_info	*xfs_infop;
+	struct xfs_mount	*mp = XFS_BHVTOM(vfsp->vfs_fbhv);
 
-	error = path_lookup(name, LOOKUP_FOLLOW, &nd);
-	if (error)
-		return error;
+	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
+		if (mp->m_flags & xfs_infop->flag)
+			seq_puts(m, xfs_infop->str);
+	}
 
-	*dev = kdev_t_to_nr(nd.dentry->d_inode->i_rdev);
-	path_release(&nd);
-	return 0;
-}
+	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+		(mp->m_qflags & XFS_UQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_UQUOTA) :
+			seq_puts(m, "," MNTOPT_UQUOTANOENF);
+	}
 
-/*
- * Convert device special files to dev_t for data, log, realtime.
- */
-int
-spectodevs(
-	struct super_block	*sb,
-	struct xfs_mount_args	*args,
-	dev_t			*ddevp,
-	dev_t			*logdevp,
-	dev_t			*rtdevp)
-{
-	int			rval = 0;
+	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_GQUOTA) :
+			seq_puts(m, "," MNTOPT_GQUOTANOENF);
+	}
 
-	*ddevp = sb->s_dev;
+	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+		seq_printf(m, "," MNTOPT_BIOSIZE "=%d", mp->m_writeio_log);
 
-	if (args->logname[0])
-		rval = spectodev(args->logname, "log", logdevp);
-	else
-		*logdevp = sb->s_dev;
+	if (mp->m_logbufs > 0)
+		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
 
-	if (args->rtname[0] && !rval)
-		rval = spectodev(args->rtname, "realtime", rtdevp);
-	else
-		*rtdevp = 0;
-	return rval;
+	if (mp->m_logbsize > 0)
+		seq_printf(m, "," MNTOPT_LOGBSIZE "=%d", mp->m_logbsize);
+
+	if (mp->m_ddev_targp->pbr_dev != mp->m_logdev_targp->pbr_dev)
+		seq_printf(m, "," MNTOPT_LOGDEV "=%s",
+				bdevname(mp->m_logdev_targp->pbr_bdev));
+
+	if (mp->m_rtdev_targp &&
+	    mp->m_ddev_targp->pbr_dev != mp->m_rtdev_targp->pbr_dev)
+		seq_printf(m, "," MNTOPT_RTDEV "=%s",
+				bdevname(mp->m_rtdev_targp->pbr_bdev));
+
+	if (mp->m_dalign > 0)
+		seq_printf(m, "," MNTOPT_SUNIT "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+
+	if (mp->m_swidth > 0)
+		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+	if (vfsp->vfs_flag & VFS_DMI)
+		seq_puts(m, "," MNTOPT_DMAPI);
+
+	return 0;
 }
 
 
@@ -439,7 +458,6 @@ linvfs_fill_super(
 		goto fail_unmount;
 
 	ip = LINVFS_GET_IP(rootvp);
-	linvfs_revalidate_core(ip, ATTR_COMM);
 
 	sb->s_root = d_alloc_root(ip);
 	if (!sb->s_root)
@@ -493,12 +511,6 @@ linvfs_set_inode_ops(
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 
-	inode->i_mode = VTTOIF(vp->v_type);
-
-	/* If this isn't a new inode, nothing to do */
-	if (!(inode->i_state & I_NEW))
-		return;
-
 	if (vp->v_type == VNON) {
 		make_bad_inode(inode);
 	} else if (S_ISREG(inode->i_mode)) {
@@ -517,8 +529,6 @@ linvfs_set_inode_ops(
 		init_special_inode(inode, inode->i_mode,
 					kdev_t_to_nr(inode->i_rdev));
 	}
-
-	unlock_new_inode(inode);
 }
 
 /*
@@ -580,7 +590,6 @@ linvfs_put_super(
 	struct super_block	*sb)
 {
 	int			error;
-	int			sector_size;
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
 
 	VFS_DOUNMOUNT(vfsp, 0, NULL, NULL, error);
@@ -591,10 +600,6 @@ linvfs_put_super(
 	}
 
 	vfs_deallocate(vfsp);
-
-	/* Reset device block size */
-	sector_size = bdev_hardsect_size(sb->s_bdev);
-	set_blocksize(sb->s_bdev, sector_size);
 }
 
 void
@@ -728,7 +733,6 @@ linvfs_get_parent(
 			VN_RELE(cvp);
 			return ERR_PTR(-EACCES);
 		}
-		error = -linvfs_revalidate_core(ip, ATTR_COMM);
 	}
 	if (error)
 		return ERR_PTR(-error);
@@ -759,72 +763,9 @@ linvfs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
-	vfs_t			*vfsp;
-	xfs_mount_t		*mp;
-	static struct proc_xfs_info {
-		int	flag;
-		char	*str;
-	} xfs_info[] = {
-		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_NOALIGN,		",noalign" },
-		{ XFS_MOUNT_NORECOVERY,		",norecovery" },
-		{ XFS_MOUNT_OSYNCISOSYNC,	",osyncisosync" },
-		{ XFS_MOUNT_NOUUID,		",nouuid" },
-		{ XFS_MOUNT_IRIXSGID,		",irixsgid" },
-		{ 0, NULL }
-	};
-	struct proc_xfs_info	*xfs_infop;
-
-	vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-
-	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
-		if (mp->m_flags & xfs_infop->flag)
-			seq_puts(m, xfs_infop->str);
-	}
-
-	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-		seq_puts(m, ",uquota");
-		if (!(mp->m_qflags & XFS_UQUOTA_ENFD))
-			seq_puts(m, ",uqnoenforce");
-	}
+	vfs_t			*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
 
-	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		seq_puts(m, ",gquota");
-		if (!(mp->m_qflags & XFS_GQUOTA_ENFD))
-			seq_puts(m, ",gqnoenforce");
-	}
-
-	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, ",biosize=%d", mp->m_writeio_log);
-
-	if (mp->m_logbufs > 0)
-		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
-
-	if (mp->m_logbsize > 0)
-		seq_printf(m, ",logbsize=%d", mp->m_logbsize);
-
-	if (mp->m_ddev_targp->pbr_dev != mp->m_logdev_targp->pbr_dev)
-		seq_printf(m, ",logdev=%s",
-				bdevname(mp->m_logdev_targp->pbr_bdev));
-
-	if (mp->m_rtdev_targp &&
-	    mp->m_ddev_targp->pbr_dev != mp->m_rtdev_targp->pbr_dev)
-		seq_printf(m, ",rtdev=%s",
-				bdevname(mp->m_rtdev_targp->pbr_bdev));
-
-	if (mp->m_dalign > 0)
-		seq_printf(m, ",sunit=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-
-	if (mp->m_swidth > 0)
-		seq_printf(m, ",swidth=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-	if (vfsp->vfs_flag & VFS_DMI)
-		seq_puts(m, ",dmapi");
-
-	return 0;
+	return xfs_showargs(vfsp, m);
 }
 
 STATIC struct super_operations linvfs_sops = {
diff --git a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
index e783163a2300..315910498a0c 100644
--- a/fs/xfs/linux/xfs_super.h
+++ b/fs/xfs/linux/xfs_super.h
@@ -80,18 +80,8 @@
 	((s)->s_fs_info = vfsp)
 
 
-struct xfs_mount_args;
-
 extern void
 linvfs_set_inode_ops(
 	struct inode	*inode);
 
-extern int
-spectodevs(
-	struct super_block *sb,
-	struct xfs_mount_args *args,
-	dev_t		*ddevp,
-	dev_t		*logdevp,
-	dev_t		*rtdevp);
-
 #endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux/xfs_sysctl.c b/fs/xfs/linux/xfs_sysctl.c
index 840810b33f27..4229b8975e05 100644
--- a/fs/xfs/linux/xfs_sysctl.c
+++ b/fs/xfs/linux/xfs_sysctl.c
@@ -35,30 +35,34 @@
 #include <linux/proc_fs.h>
 
 /*
- * Tunable xfs parameters
+ * Tunable XFS parameters
  */
 
 extern struct xfsstats xfsstats;
 
-unsigned long xfs_min[XFS_PARAM] = {			 0,			 0, 0 };
-unsigned long xfs_max[XFS_PARAM] = { XFS_REFCACHE_SIZE_MAX,  XFS_REFCACHE_SIZE_MAX, 1 };
+STATIC ulong xfs_min[XFS_PARAM] = { \
+			    0,			    0, 0, 0, 0, 0 };
+STATIC ulong xfs_max[XFS_PARAM] = { \
+	XFS_REFCACHE_SIZE_MAX,  XFS_REFCACHE_SIZE_MAX, 1, 1, 1, 1 };
 
-xfs_param_t xfs_params = { 128, 32, 0 };
+xfs_param_t xfs_params = { 128, 32, 0, 1, 0, 0 };
 
 static struct ctl_table_header *xfs_table_header;
 
-/* proc handlers */
 
-extern void xfs_refcache_resize(int xfs_refcache_new_size);
+/* Custom proc handlers */
 
-static int
-xfs_refcache_resize_proc_handler(ctl_table *ctl, int write, struct file * filp,
-		       void *buffer, size_t *lenp)
+STATIC int
+xfs_refcache_resize_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	struct file	*filp,
+	void		*buffer,
+	size_t		*lenp)
 {
-	int	ret;
-	int	*valp = ctl->data;
-	int	xfs_refcache_new_size;
-	int	xfs_refcache_old_size = *valp;
+	int		ret, *valp = ctl->data;
+	int		xfs_refcache_new_size;
+	int		xfs_refcache_old_size = *valp;
 
 	ret = proc_doulongvec_minmax(ctl, write, filp, buffer, lenp);
 	xfs_refcache_new_size = *valp;
@@ -73,12 +77,15 @@ xfs_refcache_resize_proc_handler(ctl_table *ctl, int write, struct file * filp,
 	return ret;
 }
 
-static int
-xfs_stats_clear_proc_handler(ctl_table *ctl, int write, struct file * filp,
-		       void *buffer, size_t *lenp)
+STATIC int
+xfs_stats_clear_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	struct file	*filp,
+	void		*buffer,
+	size_t		*lenp)
 {
-	int		ret;
-	int		*valp = ctl->data;
+	int		ret, *valp = ctl->data;
 	__uint32_t	vn_active;
 
 	ret = proc_doulongvec_minmax(ctl, write, filp, buffer, lenp);
@@ -95,7 +102,7 @@ xfs_stats_clear_proc_handler(ctl_table *ctl, int write, struct file * filp,
 	return ret;
 }
 
-static ctl_table xfs_table[] = {
+STATIC ctl_table xfs_table[] = {
 	{XFS_REFCACHE_SIZE, "refcache_size", &xfs_params.refcache_size,
 	sizeof(ulong), 0644, NULL, &xfs_refcache_resize_proc_handler,
 	&sysctl_intvec, NULL, &xfs_min[0], &xfs_max[0]},
@@ -108,15 +115,27 @@ static ctl_table xfs_table[] = {
 	sizeof(ulong), 0644, NULL, &xfs_stats_clear_proc_handler,
 	&sysctl_intvec, NULL, &xfs_min[2], &xfs_max[2]},
 
+	{XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[3], &xfs_max[3]},
+
+	{XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[4], &xfs_max[4]},
+
+	{XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode,
+	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
+	&sysctl_intvec, NULL, &xfs_min[5], &xfs_max[5]},
+
 	{0}
 };
 
-static ctl_table xfs_dir_table[] = {
+STATIC ctl_table xfs_dir_table[] = {
 	{FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
 	{0}
 };
 
-static ctl_table xfs_root_table[] = {
+STATIC ctl_table xfs_root_table[] = {
 	{CTL_FS, "fs",	NULL, 0, 0555, xfs_dir_table},
 	{0}
 };
diff --git a/fs/xfs/linux/xfs_sysctl.h b/fs/xfs/linux/xfs_sysctl.h
index 6649017ec372..4bf5749d1827 100644
--- a/fs/xfs/linux/xfs_sysctl.h
+++ b/fs/xfs/linux/xfs_sysctl.h
@@ -39,18 +39,25 @@
  * Tunable xfs parameters
  */
 
-#define XFS_PARAM	3
+#define XFS_PARAM	(sizeof(struct xfs_param) / sizeof(ulong))
 
 typedef struct xfs_param {
-	ulong	refcache_size;	/* Size of nfs refcache */
-	ulong	refcache_purge; /* # of entries to purge each time */
-	ulong	stats_clear;	/* reset all xfs stats to 0 */
+	ulong	refcache_size;	/* Size of NFS reference cache.          */
+	ulong	refcache_purge;	/* # of entries to purge each time.      */
+	ulong	stats_clear;	/* Reset all XFS statistics to zero.     */
+	ulong	restrict_chown;	/* Root/non-root can give away files.    */
+	ulong	sgid_inherit;	/* Inherit ISGID bit if process' GID is  */
+				/*  not a member of the parent dir GID.  */
+	ulong	symlink_mode;	/* Symlink creat mode affected by umask. */
 } xfs_param_t;
 
 enum {
 	XFS_REFCACHE_SIZE = 1,
 	XFS_REFCACHE_PURGE = 2,
 	XFS_STATS_CLEAR = 3,
+	XFS_RESTRICT_CHOWN = 4,
+	XFS_SGID_INHERIT = 5,
+	XFS_SYMLINK_MODE = 6,
 };
 
 extern xfs_param_t	xfs_params;
diff --git a/fs/xfs/linux/xfs_vfs.h b/fs/xfs/linux/xfs_vfs.h
index 0f384eb8220f..381cb9d7c6d4 100644
--- a/fs/xfs/linux/xfs_vfs.h
+++ b/fs/xfs/linux/xfs_vfs.h
@@ -92,6 +92,8 @@ typedef struct vfsops {
 					/* send dmapi mount event */
 	int	(*vfs_dmapi_fsys_vector)(bhv_desc_t *,
 					 struct dm_fcntl_vector *);
+	void	(*vfs_init_vnode)(bhv_desc_t *, struct vnode *,
+					bhv_desc_t *, int);
 	void	(*vfs_force_shutdown)(bhv_desc_t *,
 					int, char *, int);
 } vfsops_t;
@@ -132,6 +134,14 @@ typedef struct vfsops {
 	rv = (*(VFS_FOPS(vfsp)->vfs_vget))((vfsp)->vfs_fbhv, vpp, fidp);  \
 	BHV_READ_UNLOCK(&(vfsp)->vfs_bh); \
 }
+
+#define VFS_INIT_VNODE(vfsp, vp, bhv, unlock) \
+{	\
+	BHV_READ_LOCK(&(vfsp)->vfs_bh); \
+	(*(VFS_FOPS(vfsp)->vfs_init_vnode))((vfsp)->vfs_fbhv, vp, bhv, unlock);\
+	BHV_READ_UNLOCK(&(vfsp)->vfs_bh); \
+}
+
 /* No behavior lock here */
 #define VFS_FORCE_SHUTDOWN(vfsp, flags) \
 	(*(VFS_FOPS(vfsp)->vfs_force_shutdown))((vfsp)->vfs_fbhv, flags, __FILE__, __LINE__);
diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
index 0d4cb5ea14eb..51a855c9a4a0 100644
--- a/fs/xfs/linux/xfs_vnode.c
+++ b/fs/xfs/linux/xfs_vnode.c
@@ -179,20 +179,10 @@ vn_get(struct vnode *vp, vmap_t *vmap)
 	if (inode->i_state & I_FREEING)
 		return NULL;
 
-	inode = iget_locked(vmap->v_vfsp->vfs_super, vmap->v_ino);
+	inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
 	if (inode == NULL)		/* Inode not present */
 		return NULL;
 
-	/* We do not want to create new inodes via vn_get,
-	 * returning NULL here is OK.
-	 */
-	if (inode->i_state & I_NEW) {
-		make_bad_inode(inode);
-		unlock_new_inode(inode);
-		iput(inode);
-		return NULL;
-	}
-
 	vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
 	ASSERT((vp->v_flag & VPURGE) == 0);
 
@@ -203,7 +193,7 @@ vn_get(struct vnode *vp, vmap_t *vmap)
  * "revalidate" the linux inode.
  */
 int
-vn_revalidate(struct vnode *vp, int flags)
+vn_revalidate(struct vnode *vp)
 {
 	int		error;
 	struct inode	*inode;
@@ -215,7 +205,7 @@ vn_revalidate(struct vnode *vp, int flags)
 
 	ASSERT(vp->v_bh.bh_first != NULL);
 
-	VOP_GETATTR(vp, &va, flags & ATTR_LAZY, NULL, error);
+	VOP_GETATTR(vp, &va, 0, NULL, error);
 
 	if (! error) {
 		inode = LINVFS_GET_IP(vp);
@@ -225,27 +215,12 @@ vn_revalidate(struct vnode *vp, int flags)
 		inode->i_nlink	    = va.va_nlink;
 		inode->i_uid	    = va.va_uid;
 		inode->i_gid	    = va.va_gid;
-		inode->i_rdev	    = mk_kdev(MAJOR(va.va_rdev),
-						MINOR(va.va_rdev));
-		inode->i_blksize    = PAGE_CACHE_SIZE;
-		inode->i_generation = va.va_gencount;
-		if ((flags & ATTR_COMM) ||
-		    S_ISREG(inode->i_mode) ||
-		    S_ISDIR(inode->i_mode) ||
-		    S_ISLNK(inode->i_mode)) {
-			inode->i_size	    = va.va_size;
-			inode->i_blocks	    = va.va_nblocks;
-			inode->i_atime	    = va.va_atime.tv_sec;
-			inode->i_mtime	    = va.va_mtime.tv_sec;
-			inode->i_ctime	    = va.va_ctime.tv_sec;
-		}
-		if (flags & ATTR_LAZY)
-			vp->v_flag &= ~VMODIFIED;
-		else
-			VUNMODIFY(vp);
-	} else {
-		vn_trace_exit(vp, "vn_revalidate.error",
-					(inst_t *)__return_address);
+		inode->i_size	    = va.va_size;
+		inode->i_blocks	    = va.va_nblocks;
+		inode->i_mtime	    = va.va_mtime.tv_sec;
+		inode->i_ctime	    = va.va_ctime.tv_sec;
+		inode->i_atime	    = va.va_atime.tv_sec;
+		VUNMODIFY(vp);
 	}
 
 	return -error;
@@ -412,8 +387,7 @@ vn_remove(struct vnode *vp)
 	 * After the following purge the vnode
 	 * will no longer exist.
 	 */
-	VMAP(vp, XFS_BHVTOI(vp->v_fbhv), vmap);
-
+	VMAP(vp, vmap);
 	vn_purge(vp, &vmap);
 }
 
diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
index bf6025bfe0a4..dc76f19ca268 100644
--- a/fs/xfs/linux/xfs_vnode.h
+++ b/fs/xfs/linux/xfs_vnode.h
@@ -211,8 +211,7 @@ typedef int	(*vop_fid2_t)(bhv_desc_t *, struct fid *);
 typedef int	(*vop_release_t)(bhv_desc_t *);
 typedef int	(*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
 typedef void	(*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
-typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, struct page_buf_bmap_s *, int *);
-typedef int	(*vop_strategy_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct cred *, struct page_buf_bmap_s *, int *);
+typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct page_buf_bmap_s *, int *);
 typedef int	(*vop_reclaim_t)(bhv_desc_t *);
 typedef int	(*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
 				struct cred *);
@@ -254,7 +253,6 @@ typedef struct vnodeops {
 	vop_rwlock_t		vop_rwlock;
 	vop_rwunlock_t		vop_rwunlock;
 	vop_bmap_t		vop_bmap;
-	vop_strategy_t		vop_strategy;
 	vop_reclaim_t		vop_reclaim;
 	vop_attr_get_t		vop_attr_get;
 	vop_attr_set_t		vop_attr_set;
@@ -286,16 +284,10 @@ typedef struct vnodeops {
 	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,cr);\
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
-#define VOP_BMAP(vp,of,sz,rw,cr,b,n,rv)					\
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
 {									\
 	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,cr,b,n);		\
-	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
-}
-#define VOP_STRATEGY(vp,of,sz,rw,cr,b,n,rv)				\
-{									\
-	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_strategy, vp)((vp)->v_fbhv,of,sz,rw,cr,b,n);	\
+	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n);		\
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
 #define VOP_OPEN(vp, cr, rv)						\
@@ -528,14 +520,14 @@ typedef struct vattr {
 	mode_t		va_mode;	/* file access mode */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
-	dev_t		va_fsid;	/* file system id (dev for now) */
+	xfs_dev_t	va_fsid;	/* file system id (dev for now) */
 	xfs_ino_t	va_nodeid;	/* node id */
 	nlink_t		va_nlink;	/* number of references to file */
 	xfs_off_t	va_size;	/* file size in bytes */
 	timespec_t	va_atime;	/* time of last access */
 	timespec_t	va_mtime;	/* time of last modification */
 	timespec_t	va_ctime;	/* time file ``created'' */
-	dev_t		va_rdev;	/* device the file represents */
+	xfs_dev_t	va_rdev;	/* device the file represents */
 	u_long		va_blksize;	/* fundamental block size */
 	__int64_t	va_nblocks;	/* # of blocks allocated */
 	u_long		va_vcode;	/* version code */
@@ -637,12 +629,13 @@ typedef struct vnode_map {
 	xfs_ino_t	v_ino;			/* inode #	*/
 } vmap_t;
 
-#define VMAP(vp, ip, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
-				 (vmap).v_number = (vp)->v_number,	\
-				 (vmap).v_ino	 = (ip)->i_ino; }
+#define VMAP(vp, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
+			 (vmap).v_number = (vp)->v_number,	\
+			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
+
 extern void	vn_purge(struct vnode *, vmap_t *);
 extern vnode_t	*vn_get(struct vnode *, vmap_t *);
-extern int	vn_revalidate(struct vnode *, int);
+extern int	vn_revalidate(struct vnode *);
 extern void	vn_remove(struct vnode *);
 
 static inline int vn_count(struct vnode *vp)
diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index c98dc4637050..b066bc7878df 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -305,8 +305,7 @@ _pagebuf_initialize(
 	/*
 	 * We don't want certain flags to appear in pb->pb_flags.
 	 */
-	flags &= ~(PBF_LOCK|PBF_ENTER_PAGES|PBF_MAPPED);
-	flags &= ~(PBF_DONT_BLOCK|PBF_READ_AHEAD);
+	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
 
 	pb_tracking_get(pb);
 
@@ -545,9 +544,9 @@ _pagebuf_lookup_pages(
 		size -= nbytes;
 
 		if (!PageUptodate(page)) {
-			if ((blocksize == PAGE_CACHE_SIZE) &&
-			    (flags & PBF_READ)) {
-				pb->pb_locked = 1;
+			if (blocksize == PAGE_CACHE_SIZE) {
+				if (flags & PBF_READ)
+					pb->pb_locked = 1;
 				good_pages--;
 			} else if (!PagePrivate(page)) {
 				unsigned long i, range = (offset + nbytes) >> SECTOR_SHIFT;
@@ -717,7 +716,6 @@ found:
 				PBF_MAPPED | \
 				_PBF_LOCKABLE | \
 				_PBF_ALL_PAGES_MAPPED | \
-				_PBF_SOME_INVALID_PAGES | \
 				_PBF_ADDR_ALLOCATED | \
 				_PBF_MEM_ALLOCATED;
 	PB_TRACE(pb, PB_TRACE_REC(got_lk), 0);
@@ -832,19 +830,11 @@ pagebuf_lookup(
 	int			flags)
 {
 	page_buf_t		*pb = NULL;
-	int			status;
 
 	flags |= _PBF_PRIVATE_BH;
 	pb = pagebuf_allocate(flags);
 	if (pb) {
 		_pagebuf_initialize(pb, target, ioff, isize, flags);
-		if (flags & PBF_ENTER_PAGES) {
-			status = _pagebuf_lookup_pages(pb, &inode->i_data, 0);
-			if (status != 0) {
-				pagebuf_free(pb);
-				return (NULL);
-			}
-		}
 	}
 	return pb;
 }
@@ -985,6 +975,7 @@ pagebuf_get_no_daddr(
 	}
 	/* otherwise pagebuf_free just ignores it */
 	pb->pb_flags |= _PBF_MEM_ALLOCATED;
+	PB_CLEAR_OWNER(pb);
 	up(&PBP(pb)->pb_sema);	/* Return unlocked pagebuf */
 
 	PB_TRACE(pb, PB_TRACE_REC(no_daddr), rmem);
@@ -1926,14 +1917,14 @@ STATIC ctl_table pagebuf_table[] = {
 	sizeof(ulong), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax,
 	&sysctl_intvec, NULL, &pagebuf_min[1], &pagebuf_max[1]},
 
-	{PB_STATS_CLEAR, "stats_clear", &pb_params.data[3],
+	{PB_STATS_CLEAR, "stats_clear", &pb_params.data[2],
 	sizeof(ulong), 0644, NULL, &pb_stats_clear_handler,
-	&sysctl_intvec, NULL, &pagebuf_min[3], &pagebuf_max[3]},
+	&sysctl_intvec, NULL, &pagebuf_min[2], &pagebuf_max[2]},
 
 #ifdef PAGEBUF_TRACE
-	{PB_DEBUG, "debug", &pb_params.data[4],
+	{PB_DEBUG, "debug", &pb_params.data[3],
 	sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax,
-	&sysctl_intvec, NULL, &pagebuf_min[4], &pagebuf_max[4]},
+	&sysctl_intvec, NULL, &pagebuf_min[3], &pagebuf_max[3]},
 #endif
 	{0}
 };
diff --git a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
index 18e27035cf72..ff240fefd32c 100644
--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -100,35 +100,27 @@ typedef enum page_buf_flags_e {		/* pb_flags values */
 	PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages	   */
 	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
 	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
-	PBF_RELEASE = (1 << 12),/* buffer to be released after I/O is done */
 
 	/* flags used only as arguments to access routines */
 	PBF_LOCK = (1 << 13),	/* lock requested			   */
 	PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait	   */
-	PBF_ALLOCATE = (1 << 15), /* allocate all pages		  (UNUSED) */
-	PBF_FILE_ALLOCATE = (1 << 16), /* allocate all file space	   */
-	PBF_DONT_BLOCK = (1 << 17), /* do not block in current thread	   */
-	PBF_DIRECT = (1 << 18),	  /* direct I/O desired			   */
-	PBF_ENTER_PAGES = (1 << 21), /* create invalid pages for all	   */
-				/* pages in the range of the buffer	   */
-				/* not already associated with buffer	   */
+	PBF_FILE_ALLOCATE = (1 << 15), /* allocate all file space	   */
+	PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
+	PBF_DIRECT = (1 << 17),	  /* direct I/O desired			   */
 
 	/* flags used only internally */
 	_PBF_LOCKABLE = (1 << 19), /* page_buf_t may be locked		   */
 	_PBF_PRIVATE_BH = (1 << 20), /* do not use public buffer heads	   */
-	_PBF_ALL_PAGES_MAPPED = (1 << 22),
+	_PBF_ALL_PAGES_MAPPED = (1 << 21),
 				/* all pages in rage are mapped		   */
-	_PBF_SOME_INVALID_PAGES = (1 << 23),
-				/* some mapped pages are not valid	   */
-	_PBF_ADDR_ALLOCATED = (1 << 24),
+	_PBF_ADDR_ALLOCATED = (1 << 22),
 				/* pb_addr space was allocated		   */
-	_PBF_MEM_ALLOCATED = (1 << 25),
+	_PBF_MEM_ALLOCATED = (1 << 23),
 				/* pb_mem and underlying pages allocated   */
 
-	PBF_FORCEIO = (1 << 27),
-	PBF_FLUSH = (1 << 28),	/* flush disk write cache */
-	PBF_READ_AHEAD = (1 << 29),
-	PBF_FS_RESERVED_3 = (1 << 31)	/* reserved (XFS use: XFS_B_STALE) */
+	PBF_FORCEIO = (1 << 24),
+	PBF_FLUSH = (1 << 25),	/* flush disk write cache */
+	PBF_READ_AHEAD = (1 << 26),
 
 } page_buf_flags_t;
 
@@ -145,7 +137,6 @@ typedef struct pb_target {
 	struct block_device	*pbr_bdev;
 	struct address_space	*pbr_mapping;
 	unsigned int		pbr_blocksize;
-	unsigned int		pbr_blocksize_bits;
 } pb_target_t;
 
 /*
@@ -303,26 +294,16 @@ extern int pagebuf_lock_value(		/* return count on lock		*/
 extern int pagebuf_lock(		/* lock buffer			*/
 		page_buf_t *);		/* buffer to lock		*/
 
-extern void pagebuf_lock_disable(	/* disable buffer locking	*/
-		struct pb_target *,	/* inode for buffers		*/
-		int);			/* do blkdev_put?		*/
-
-extern struct pb_target *pagebuf_lock_enable(
-		dev_t,
-		int);			/* do blkdev_get?		*/
-
-extern void pagebuf_target_blocksize(
-		pb_target_t *,
-		unsigned int);		/* block size			*/
-
 extern void pagebuf_target_clear(struct pb_target *);
 
 extern void pagebuf_unlock(		/* unlock buffer		*/
 		page_buf_t *);		/* buffer to unlock		*/
 
 /* Buffer Utility Routines */
-
-#define pagebuf_geterror(pb)	((pb)->pb_error)
+static inline int pagebuf_geterror(page_buf_t *pb)
+{
+	return (pb ? pb->pb_error : ENOMEM);
+}
 
 extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
 		page_buf_t *);		/* buffer to mark		*/
diff --git a/fs/xfs/pagebuf/page_buf_locking.c b/fs/xfs/pagebuf/page_buf_locking.c
index 6be04596ec11..ecabe0f3c2c2 100644
--- a/fs/xfs/pagebuf/page_buf_locking.c
+++ b/fs/xfs/pagebuf/page_buf_locking.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -58,10 +57,6 @@
 
 #include "page_buf_internal.h"
 
-#ifndef EVMS_MAJOR
-#define EVMS_MAJOR      117
-#endif
-
 /*
  *	pagebuf_cond_lock
  *
@@ -126,82 +121,6 @@ pagebuf_lock(
 	return 0;
 }
 
-/*
- *	pagebuf_lock_disable
- *
- *	pagebuf_lock_disable disables buffer object locking for an inode.
- *	remove_super() does a blkdev_put for us on the data device, hence
- * 	the do_blkdev_put argument.
- */
-void
-pagebuf_lock_disable(
-	pb_target_t		*target,
-	int			do_blkdev_put)
-{
-	pagebuf_delwri_flush(target, PBDF_WAIT, NULL);
-	if (do_blkdev_put)
-		blkdev_put(target->pbr_bdev, BDEV_FS);
-	kfree(target);
-}
-
-/*
- *	pagebuf_lock_enable
- *
- *	get_sb_bdev() does a blkdev_get for us on the data device, hence
- *	the do_blkdev_get argument.
- */
-pb_target_t *
-pagebuf_lock_enable(
-	dev_t			dev,
-	int			do_blkdev_get)
-{
-	struct block_device	*bdev;
-	pb_target_t		*target;
-	int			error = -ENOMEM;
-
-	target = kmalloc(sizeof(pb_target_t), GFP_KERNEL);
-	if (unlikely(!target))
-		return ERR_PTR(error);
-
-	bdev = bdget(dev);
-	if (unlikely(!bdev))
-		goto fail;
-
-	if (do_blkdev_get) {
-		error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS);
-		if (unlikely(error))
-			goto fail;
-	}
-
-	target->pbr_dev = dev;
-	target->pbr_bdev = bdev;
-	target->pbr_mapping = bdev->bd_inode->i_mapping;
-
-	pagebuf_target_blocksize(target, PAGE_CACHE_SIZE);
-	
-	if ((MAJOR(dev) == MD_MAJOR) || (MAJOR(dev) == EVMS_MAJOR))
-		target->pbr_flags = PBR_ALIGNED_ONLY;
-	else if (MAJOR(dev) == LVM_BLK_MAJOR)
-		target->pbr_flags = PBR_SECTOR_ONLY;
-	else
-		target->pbr_flags = 0;
-
-	return target;
-
-fail:
-	kfree(target);
-	return ERR_PTR(error);
-}
-
-void
-pagebuf_target_blocksize(
-	pb_target_t		*target,
-	unsigned int		blocksize)
-{
-	target->pbr_blocksize = blocksize;
-	target->pbr_blocksize_bits = ffs(blocksize) - 1;
-}
-
 void
 pagebuf_target_clear(
 	pb_target_t		*target)
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 795056094248..d6fe5d8b6983 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -40,6 +40,11 @@ int			doass = 1;
 static char		message[256];	/* keep it off the stack */
 static spinlock_t 	xfs_err_lock = SPIN_LOCK_UNLOCKED;
 
+/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
+static char		*err_level[8] = {KERN_EMERG, KERN_ALERT, KERN_CRIT,
+					 KERN_ERR, KERN_WARNING, KERN_NOTICE,
+					 KERN_INFO, KERN_DEBUG};
+
 void
 assfail(char *a, char *f, int l)
 {
@@ -71,10 +76,7 @@ get_thread_id(void)
 	return current->pid;
 }
 
-# define xdprintk(format...)	printk(format)
-#else
-# define xdprintk(format...)	do { } while (0)
-#endif
+#endif /* DEBUG */
 
 void
 cmn_err(register int level, char *fmt, ...)
@@ -86,18 +88,7 @@ cmn_err(register int level, char *fmt, ...)
 	va_start(ap, fmt);
 	if (*fmt == '!') fp++;
 	vsprintf(message, fp, ap);
-	switch (level) {
-	case CE_CONT:
-	case CE_WARN:
-		printk("%s", message);
-		break;
-	case CE_DEBUG:
-		xdprintk("%s", message);
-		break;
-	default:
-		printk("%s\n", message);
-		break;
-	}
+	printk("%s%s\n", err_level[level], message);
 	va_end(ap);
 	spin_unlock(&xfs_err_lock);
 
@@ -111,18 +102,8 @@ icmn_err(register int level, char *fmt, va_list ap)
 {
 	spin_lock(&xfs_err_lock);
 	vsprintf(message, fmt, ap);
-	switch (level) {
-	case CE_CONT:
-	case CE_WARN:
-		printk("%s", message);
-		break;
-	case CE_DEBUG:
-		xdprintk("%s", message);
-		break;
-	default:
-		printk("cmn_err level %d ", level);
-		printk("%s\n", message);
-		break;
-	}
 	spin_unlock(&xfs_err_lock);
+	printk("%s%s\n", err_level[level], message);
+	if (level == CE_PANIC)
+		BUG();
 }
diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
index 4fc3831eed38..15dbd090c6b8 100644
--- a/fs/xfs/support/move.c
+++ b/fs/xfs/support/move.c
@@ -72,9 +72,9 @@ uiomove(void *cp, size_t n, enum uio_rw rw, struct uio *uio)
 
 		case UIO_SYSSPACE:
 			if (rw == UIO_READ)
-				bcopy(cp, iov->iov_base, cnt);
+				memcpy(iov->iov_base, cp, cnt);
 			else
-				bcopy(iov->iov_base, cp, cnt);
+				memcpy(cp, iov->iov_base, cnt);
 			break;
 
 		default:
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index e01b7b6c7a15..dd63285df2e6 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -36,11 +36,6 @@
 #include <linux/uio.h>
 #include <asm/uaccess.h>
 
-#define bzero(p,s)	memset((p), 0, (s))
-#define bcopy(s,d,n)	memcpy((d),(s),(n))
-#define bcmp(s1,s2,l)	memcmp(s1,s2,l)
-#define ovbcopy(from,to,count)	memmove(to,from,count)
-
 typedef struct iovec iovec_t;
 
 typedef struct uio {
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index f30b857a3f9a..fd98101312c1 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -109,7 +109,7 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 void
 uuid_create_nil(uuid_t *uuid)
 {
-	bzero(uuid, sizeof *uuid);
+	memset(uuid, 0, sizeof(*uuid));
 }
 
 int
@@ -129,7 +129,7 @@ uuid_is_nil(uuid_t *uuid)
 int
 uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
 {
-	return bcmp(uuid1, uuid2, sizeof(uuid_t)) ? B_FALSE : B_TRUE;
+	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? B_FALSE : B_TRUE;
 }
 
 /*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4489ce3a1502..5c89a956c866 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -494,13 +494,13 @@ xfs_alloc_trace_modagf(
 		(void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
-						ARCH_CONVERT);
+						ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
 		(void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
@@ -2597,7 +2597,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
 	cnt = mp->m_perag[agno].pagb_count;
 
-	uend = bno + len;
+	uend = bno + len - 1;
 
 	/* search pagb_list for this slot, skipping open slots */
 	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
@@ -2607,16 +2607,16 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 		 * (start1,length1) within (start2, length2)
 		 */
 		if (bsy->busy_tp != NULL) {
-			bend = bsy->busy_start + bsy->busy_length;
-			if ( (bno >= bsy->busy_start && bno <= bend) ||
-			     (uend >= bsy->busy_start && uend <= bend) ||
-			     (bno <= bsy->busy_start && uend >= bsy->busy_start) ) {
+			bend = bsy->busy_start + bsy->busy_length - 1;
+			if ((bno > bend) ||
+			    (uend < bsy->busy_start)) {
+				cnt--;
+			} else {
 				TRACE_BUSYSEARCH("xfs_alloc_search_busy",
 						 "found1", agno, bno, len, n,
 						 tp);
 				break;
 			}
-			cnt--;
 		}
 	}
 
@@ -2626,7 +2626,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	 */
 	if (cnt) {
 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
-		lsn = bsy->busy_tp->t_lsn;
+		lsn = bsy->busy_tp->t_commit_lsn;
 		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
 		xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
 	} else {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 11c7618be7c7..9bb3fe79243d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -132,9 +132,9 @@ xfs_alloc_delrec(
 		}
 #endif
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&lkp[ptr], &lkp[ptr - 1],
+			memmove(&lkp[ptr - 1], &lkp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
-			ovbcopy(&lpp[ptr], &lpp[ptr - 1],
+			memmove(&lpp[ptr - 1], &lpp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
 			xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 			xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
@@ -147,7 +147,7 @@ xfs_alloc_delrec(
 	else {
 		lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&lrp[ptr], &lrp[ptr - 1],
+			memmove(&lrp[ptr - 1], &lrp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
 			xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 		}
@@ -464,8 +464,8 @@ xfs_alloc_delrec(
 				return error;
 		}
 #endif
-		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
-		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
+		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
+		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
 		xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
@@ -476,7 +476,7 @@ xfs_alloc_delrec(
 		 */
 		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
 		xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	}
@@ -697,9 +697,9 @@ xfs_alloc_insrec(
 				return error;
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
-		ovbcopy(&pp[ptr - 1], &pp[ptr],
+		memmove(&pp[ptr], &pp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
@@ -723,7 +723,7 @@ xfs_alloc_insrec(
 		 * It's a leaf entry.  Make a hole for the new record.
 		 */
 		rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
 		/*
 		 * Now stuff the new record in, bump numrecs
@@ -1217,12 +1217,12 @@ xfs_alloc_lshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	} else {
-		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
 		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
@@ -1475,8 +1475,8 @@ xfs_alloc_rshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
 			return error;
@@ -1492,7 +1492,7 @@ xfs_alloc_rshift(
 
 		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
 		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
@@ -1608,8 +1608,8 @@ xfs_alloc_split(
 				return error;
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));/* INT_: copy */
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); /* INT_: copy */
 		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		*keyp = *rkp;
@@ -1623,7 +1623,7 @@ xfs_alloc_split(
 
 		lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
 		keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 57a28544be96..4629bc745e07 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -260,13 +260,13 @@
 
 #define DIRINO_COPY_ARCH(from,to,arch) \
     if ((arch) == ARCH_NOCONVERT) { \
-	bcopy(from,to,sizeof(xfs_ino_t)); \
+	memcpy(to,from,sizeof(xfs_ino_t)); \
     } else { \
 	INT_SWAP_UNALIGNED_64(from,to); \
     }
 #define DIRINO4_COPY_ARCH(from,to,arch) \
     if ((arch) == ARCH_NOCONVERT) { \
-	bcopy((((__u8*)from+4)),to,sizeof(xfs_dir2_ino4_t)); \
+	memcpy(to,(((__u8*)from+4)),sizeof(xfs_dir2_ino4_t)); \
     } else { \
 	INT_SWAP_UNALIGNED_32(from,to); \
     }
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 74563b62d3fc..482a20fcbfde 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -120,7 +120,7 @@ xfs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.value = value;
@@ -215,7 +215,7 @@ xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.value = value;
@@ -469,7 +469,7 @@ xfs_attr_remove(bhv_desc_t *bdp, char *name, int flags, struct cred *cred)
 	/*
 	 * Fill in the arg structure for this request.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.name = name;
 	args.namelen = namelen;
 	args.flags = flags;
diff --git a/fs/xfs/xfs_attr_fetch.c b/fs/xfs/xfs_attr_fetch.c
index 0c9af54eeed5..4b1a23cb21a6 100644
--- a/fs/xfs/xfs_attr_fetch.c
+++ b/fs/xfs/xfs_attr_fetch.c
@@ -43,7 +43,7 @@ xfs_attr_fetch(xfs_inode_t *ip, char *name, char *value, int valuelen)
 	/*
 	 * Do the argument setup for the xfs_attr routines.
 	 */
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = ip;
 	args.flags = ATTR_ROOT;
 	args.whichfork = XFS_ATTR_FORK;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 884da53fa54d..b1c4836d6709 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -145,8 +145,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 	sfe->namelen = args->namelen;
 	INT_SET(sfe->valuelen, ARCH_CONVERT, args->valuelen);
 	sfe->flags = (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
-	bcopy(args->name, sfe->nameval, args->namelen);
-	bcopy(args->value, &sfe->nameval[args->namelen], args->valuelen);
+	memcpy(sfe->nameval, args->name, args->namelen);
+	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, 1);
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
@@ -178,7 +178,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(sfe->nameval, args->name, args->namelen) != 0)
+		if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -191,7 +191,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	end = base + size;
 	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
 	if (end != totsize) {
-		ovbcopy(&((char *)sf)[end], &((char *)sf)[base],
+		memmove(&((char *)sf)[base], &((char *)sf)[end],
 							totsize - end);
 	}
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
@@ -222,7 +222,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -250,7 +250,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
-		if (bcmp(args->name, sfe->nameval, args->namelen) != 0)
+		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
@@ -264,7 +264,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 			return(XFS_ERROR(ERANGE));
 		}
 		args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
-		bcopy(&sfe->nameval[args->namelen], args->value,
+		memcpy(args->value, &sfe->nameval[args->namelen],
 						    args->valuelen);
 		return(XFS_ERROR(EEXIST));
 	}
@@ -293,7 +293,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	size = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(ifp->if_u1.if_data, tmpbuffer, size);
+	memcpy(tmpbuffer, ifp->if_u1.if_data, size);
 	sf = (xfs_attr_shortform_t *)tmpbuffer;
 
 	xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
@@ -307,7 +307,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		if (error == EIO)
 			goto out;
 		xfs_idata_realloc(dp, size, XFS_ATTR_FORK);	/* try to put */
-		bcopy(tmpbuffer, ifp->if_u1.if_data, size);	/* it back */
+		memcpy(ifp->if_u1.if_data, tmpbuffer, size);	/* it back */
 		goto out;
 	}
 
@@ -319,11 +319,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		if (error)
 			goto out;
 		xfs_idata_realloc(dp, size, XFS_ATTR_FORK);	/* try to put */
-		bcopy(tmpbuffer, ifp->if_u1.if_data, size);	/* it back */
+		memcpy(ifp->if_u1.if_data, tmpbuffer, size);	/* it back */
 		goto out;
 	}
 
-	bzero((char *)&nargs, sizeof(nargs));
+	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
@@ -590,11 +590,11 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(tmpbuffer != NULL);
 
 	ASSERT(bp != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
 						== XFS_ATTR_LEAF_MAGIC);
-	bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
 	 * Clean out the prior contents of the attribute list.
@@ -609,7 +609,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Copy the attributes
 	 */
-	bzero((char *)&nargs, sizeof(nargs));
+	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
@@ -669,7 +669,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	if (error)
 		goto out;
 	ASSERT(bp2 != NULL);
-	bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
 	xfs_da_buf_done(bp1);
 	bp1 = NULL;
 	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
@@ -725,7 +725,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 		return(error);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
@@ -900,7 +900,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
 		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
-		ovbcopy((char *)entry, (char *)(entry+1), tmp);
+		memmove((char *)(entry+1), (char *)entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
@@ -955,13 +955,13 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		name_loc->namelen = args->namelen;
 		INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
-		bcopy(args->name, (char *)name_loc->nameval, args->namelen);
-		bcopy(args->value, (char *)&name_loc->nameval[args->namelen],
+		memcpy((char *)name_loc->nameval, args->name, args->namelen);
+		memcpy((char *)&name_loc->nameval[args->namelen], args->value,
 				   INT_GET(name_loc->valuelen, ARCH_CONVERT));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		name_rmt->namelen = args->namelen;
-		bcopy(args->name, (char *)name_rmt->name, args->namelen);
+		memcpy((char *)name_rmt->name, args->name, args->namelen);
 		entry->flags |= XFS_ATTR_INCOMPLETE;
 		/* just in case */
 		INT_ZERO(name_rmt->valuelen, ARCH_CONVERT);
@@ -1017,8 +1017,8 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	mp = trans->t_mountp;
 	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(mp));
-	bzero(bp->data, XFS_LBSIZE(mp));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp));
+	memset(bp->data, 0, XFS_LBSIZE(mp));
 
 	/*
 	 * Copy basic information
@@ -1390,7 +1390,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -1450,7 +1450,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1585,7 +1585,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
-	bzero(XFS_ATTR_LEAF_NAME(leaf, args->index), entsize);
+	memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize);
 	INT_MOD(hdr->usedbytes, ARCH_CONVERT, -entsize);
 	xfs_da_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
@@ -1593,12 +1593,12 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - args->index)
 					* sizeof(xfs_attr_leaf_entry_t);
-	ovbcopy((char *)(entry+1), (char *)entry, tmp);
+	memmove((char *)entry, (char *)(entry+1), tmp);
 	INT_MOD(hdr->count, ARCH_CONVERT, -1);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
-	bzero((char *)entry, sizeof(xfs_attr_leaf_entry_t));
+	memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
 
 	/*
 	 * If we removed the first entry, re-find the first used byte
@@ -1701,7 +1701,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
 		ASSERT(tmpbuffer != NULL);
-		bzero(tmpbuffer, state->blocksize);
+		memset(tmpbuffer, 0, state->blocksize);
 		tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info; /* struct copy */
@@ -1729,7 +1729,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
 				mp);
 		}
-		bcopy((char *)tmp_leaf, (char *)save_leaf, state->blocksize);
+		memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
 	}
 
@@ -1840,7 +1840,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe);
 			if (name_loc->namelen != args->namelen)
 				continue;
-			if (bcmp(args->name, (char *)name_loc->nameval,
+			if (memcmp(args->name, (char *)name_loc->nameval,
 					     args->namelen) != 0)
 				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
@@ -1852,7 +1852,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, probe);
 			if (name_rmt->namelen != args->namelen)
 				continue;
-			if (bcmp(args->name, (char *)name_rmt->name,
+			if (memcmp(args->name, (char *)name_rmt->name,
 					     args->namelen) != 0)
 				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
@@ -1895,7 +1895,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if (entry->flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		ASSERT(name_loc->namelen == args->namelen);
-		ASSERT(bcmp(args->name, name_loc->nameval, args->namelen) == 0);
+		ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
 		valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
@@ -1906,11 +1906,11 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			return(XFS_ERROR(ERANGE));
 		}
 		args->valuelen = valuelen;
-		bcopy(&name_loc->nameval[args->namelen], args->value, valuelen);
+		memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		ASSERT(name_rmt->namelen == args->namelen);
-		ASSERT(bcmp(args->name, name_rmt->name, args->namelen) == 0);
+		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
 		valuelen = INT_GET(name_rmt->valuelen, ARCH_CONVERT);
 		args->rmtblkno = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
 		args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
@@ -1983,7 +1983,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
-		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+		memmove((char *)entry_d, (char *)entry_s, tmp);
 	}
 
 	/*
@@ -2004,7 +2004,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 * off for 6.2, should be revisited later.
 		 */
 		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
-			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
 			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
 			entry_d--;	/* to compensate for ++ in loop hdr */
@@ -2021,11 +2021,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			entry_d->flags = entry_s->flags;
 			ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
-			ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i),
-			      XFS_ATTR_LEAF_NAME(leaf_d, desti), tmp);
+			memmove(XFS_ATTR_LEAF_NAME(leaf_d, desti),
+				XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
 			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
-			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
 			INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
 			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
@@ -2047,7 +2047,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	} else {
 		/*
 		 * Move the remaining entries down to fill the hole,
@@ -2057,14 +2057,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
-		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+		memmove((char *)entry_d, (char *)entry_s, tmp);
 
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
 							ARCH_CONVERT)];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	}
 
 	/*
@@ -2345,7 +2345,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context,
 
 	aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
 	aep->a_valuelen = valuelen;
-	bcopy(name, aep->a_name, namelen);
+	memcpy(aep->a_name, name, namelen);
 	aep->a_name[ namelen ] = 0;
 	context->alist->al_offset[ context->count++ ] = context->firstu;
 	context->alist->al_count = context->count;
@@ -2404,7 +2404,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	}
 	ASSERT(INT_GET(entry->hashval, ARCH_CONVERT) == args->hashval);
 	ASSERT(namelen == args->namelen);
-	ASSERT(bcmp(name, args->name, namelen) == 0);
+	ASSERT(memcmp(name, args->name, namelen) == 0);
 #endif /* DEBUG */
 
 	entry->flags &= ~XFS_ATTR_INCOMPLETE;
@@ -2559,7 +2559,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	}
 	ASSERT(INT_GET(entry1->hashval, ARCH_CONVERT) == INT_GET(entry2->hashval, ARCH_CONVERT));
 	ASSERT(namelen1 == namelen2);
-	ASSERT(bcmp(name1, name2, namelen1) == 0);
+	ASSERT(memcmp(name1, name2, namelen1) == 0);
 #endif /* DEBUG */
 
 	ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 6bf9632238e7..d89a4a83d611 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -489,7 +489,7 @@ xfs_bmap_add_attrfork_local(
 		return 0;
 	if ((ip->i_d.di_mode & IFMT) == IFDIR) {
 		mp = ip->i_mount;
-		bzero(&dargs, sizeof(dargs));
+		memset(&dargs, 0, sizeof(dargs));
 		dargs.dp = ip;
 		dargs.firstblock = firstblock;
 		dargs.flist = flist;
@@ -3146,7 +3146,7 @@ xfs_bmap_delete_exlist(
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
-	ovbcopy(&base[idx + count], &base[idx],
+	memmove(&base[idx], &base[idx + count],
 		(nextents - idx) * sizeof(*base));
 	xfs_iext_realloc(ip, -count, whichfork);
 }
@@ -3174,7 +3174,7 @@ xfs_bmap_extents_to_btree(
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
 	int			error;		/* error return value */
-	xfs_extnum_t		i;		/* extent list index */
+	xfs_extnum_t		i, cnt;		/* extent list index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
 	xfs_mount_t		*mp;		/* mount structure */
@@ -3256,24 +3256,25 @@ xfs_bmap_extents_to_btree(
 	ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
 	INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
 	INT_ZERO(ablock->bb_level, ARCH_CONVERT);
-	INT_ZERO(ablock->bb_numrecs, ARCH_CONVERT);
 	INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
 	INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = ifp->if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
 		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
-			*arp++ = *ep;
-			INT_MOD(ablock->bb_numrecs, ARCH_CONVERT, +1);
+			arp->l0 = INT_GET(ep->l0, ARCH_CONVERT);
+			arp->l1 = INT_GET(ep->l1, ARCH_CONVERT);
+			arp++; cnt++;
 		}
 	}
+	INT_SET(ablock->bb_numrecs, ARCH_CONVERT, cnt);
 	ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
 	/*
 	 * Fill in the root key and pointer.
 	 */
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
-	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(arp));
+	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp));
 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
 	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
 	/*
@@ -3310,7 +3311,7 @@ xfs_bmap_insert_exlist(
 	xfs_iext_realloc(ip, count, whichfork);
 	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ovbcopy(&base[idx], &base[idx + count],
+	memmove(&base[idx + count], &base[idx],
 		(nextents - (idx + count)) * sizeof(*base));
 	for (to = idx; to < idx + count; to++, new++)
 		xfs_bmbt_set_all(&base[to], new);
@@ -3380,7 +3381,7 @@ xfs_bmap_local_to_extents(
 		ASSERT(args.len == 1);
 		*firstblock = args.fsbno;
 		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-		bcopy(ifp->if_u1.if_data, (char *)XFS_BUF_PTR(bp),
+		memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
 			ifp->if_bytes);
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -3556,7 +3557,7 @@ xfs_bmap_trace_addentry(
 	if (cnt == 1) {
 		ASSERT(r2 == NULL);
 		r2 = &tr2;
-		bzero(&tr2, sizeof(tr2));
+		memset(&tr2, 0, sizeof(tr2));
 	} else
 		ASSERT(r2 != NULL);
 	ktrace_enter(xfs_bmap_trace_buf,
@@ -4332,7 +4333,7 @@ xfs_bmap_read_extents(
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_bmap_read_extents";
 #endif
-	xfs_extnum_t		i;	/* index into the extents list */
+	xfs_extnum_t		i, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
@@ -4373,28 +4374,9 @@ xfs_bmap_read_extents(
 			break;
 		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
 			1, mp->m_bmap_dmxr[1]);
-#ifndef __KERNEL__
 		XFS_WANT_CORRUPTED_GOTO(
 			XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)),
 			error0);
-#else	/* additional, temporary, debugging code */
-		if (!(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)))) {
-			cmn_err(CE_NOTE,
-			"xfs_bmap_read_extents: FSB Sanity Check:");
-			if (!(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount))
-				cmn_err(CE_NOTE,
-					"bad AG count %d < agcount %d",
-					XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)),
-					mp->m_sb.sb_agcount);
-			if (!(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks))
-				cmn_err(CE_NOTE,
-					"bad AG BNO %d < %d",
-					XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)),
-					mp->m_sb.sb_agblocks);
-			error = XFS_ERROR(EFSCORRUPTED);
-			goto error0;
-		}
-#endif
 		bno = INT_GET(*pp, ARCH_CONVERT);
 		xfs_trans_brelse(tp, bp);
 	}
@@ -4408,7 +4390,7 @@ xfs_bmap_read_extents(
 	 * Loop over all leaf nodes.  Copy information to the extent list.
 	 */
 	for (;;) {
-		xfs_bmbt_rec_t	*frp;
+		xfs_bmbt_rec_t	*frp, *temp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
 
@@ -4422,35 +4404,9 @@ xfs_bmap_read_extents(
 				(unsigned long long) ip->i_ino);
 			goto error0;
 		}
-#ifndef __KERNEL__
 		XFS_WANT_CORRUPTED_GOTO(
 			XFS_BMAP_SANITY_CHECK(mp, block, 0),
 			error0);
-#else	/* additional, temporary, debugging code */
-		if (!(XFS_BMAP_SANITY_CHECK(mp, block, 0))) {
-			cmn_err(CE_NOTE,
-			"xfs_bmap_read_extents: BMAP Sanity Check:");
-			if (!(INT_GET(block->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC))
-				cmn_err(CE_NOTE,
-					"bb_magic 0x%x",
-					INT_GET(block->bb_magic, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_level, ARCH_CONVERT) == level))
-				cmn_err(CE_NOTE,
-					"bb_level %d",
-					INT_GET(block->bb_level, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_numrecs, ARCH_CONVERT) > 0))
-				cmn_err(CE_NOTE,
-					"bb_numrecs %d",
-					INT_GET(block->bb_numrecs, ARCH_CONVERT));
-			if (!(INT_GET(block->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0]))
-				cmn_err(CE_NOTE,
-					"bb_numrecs %d < m_bmap_dmxr[] %d",
-					INT_GET(block->bb_numrecs, ARCH_CONVERT),
-					(mp)->m_bmap_dmxr[(level) != 0]);
-			error = XFS_ERROR(EFSCORRUPTED);
-			goto error0;
-		}
-#endif
 		/*
 		 * Read-ahead the next leaf block, if any.
 		 */
@@ -4462,18 +4418,21 @@ xfs_bmap_read_extents(
 		 */
 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-		bcopy(frp, trp, num_recs * sizeof(*frp));
+		temp = trp;
+		for (j = 0; j < num_recs; j++, frp++, trp++) {
+			trp->l0 = INT_GET(frp->l0, ARCH_CONVERT);
+			trp->l1 = INT_GET(frp->l1, ARCH_CONVERT);
+		}
 		if (exntf == XFS_EXTFMT_NOSTATE) {
 			/*
 			 * Check all attribute bmap btree records and
 			 * any "older" data bmap btree records for a
 			 * set bit in the "extent flag" position.
 			 */
-			if (xfs_check_nostate_extents(trp, num_recs)) {
+			if (xfs_check_nostate_extents(temp, num_recs)) {
 				goto error0;
 			}
 		}
-		trp += num_recs;
 		i += num_recs;
 		xfs_trans_brelse(tp, bp);
 		bno = nextbno;
@@ -4650,11 +4609,6 @@ xfs_bmapi(
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	mp = ip->i_mount;
@@ -5150,11 +5104,6 @@ xfs_bmapi_single(
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 	       return XFS_ERROR(EFSCORRUPTED);
 	}
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -5228,11 +5177,6 @@ xfs_bunmapi(
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
-#ifdef __KERNEL__	/* additional, temporary, debugging code */
-		cmn_err(CE_NOTE,
-			"EFSCORRUPTED returned from file %s line %d",
-			__FILE__, __LINE__);
-#endif
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	mp = ip->i_mount;
@@ -6317,7 +6261,7 @@ xfs_bmap_count_leaves(
 	int		b;
 
 	for ( b = 1; b <= numrecs; b++, frp++)
-		*count += xfs_bmbt_get_blockcount(frp);
+		*count += xfs_bmbt_disk_get_blockcount(frp);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 5b384f4f9cba..07d513d24f74 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -207,7 +207,7 @@ xfs_bmbt_trace_argifr(
 	xfs_bmbt_irec_t		s;
 
 	d = (xfs_dfsbno_t)f;
-	xfs_bmbt_get_all(r, &s);
+	xfs_bmbt_disk_get_all(r, &s);
 	o = (xfs_dfiloff_t)s.br_startoff;
 	b = (xfs_dfsbno_t)s.br_startblock;
 	c = s.br_blockcount;
@@ -381,9 +381,9 @@ xfs_bmbt_delrec(
 		}
 #endif
 		if (ptr < numrecs) {
-			ovbcopy(&kp[ptr], &kp[ptr - 1],
+			memmove(&kp[ptr - 1], &kp[ptr],
 				(numrecs - ptr) * sizeof(*kp));
-			ovbcopy(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
+			memmove(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
 				(numrecs - ptr) * sizeof(*pp));
 			xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
 			xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
@@ -391,12 +391,12 @@ xfs_bmbt_delrec(
 	} else {
 		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
 		if (ptr < numrecs) {
-			ovbcopy(&rp[ptr], &rp[ptr - 1],
+			memmove(&rp[ptr - 1], &rp[ptr],
 				(numrecs - ptr) * sizeof(*rp));
 			xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
 		}
 		if (ptr == 1) {
-			INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rp));
+			INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(rp));
 			kp = &key;
 		}
 	}
@@ -619,14 +619,14 @@ xfs_bmbt_delrec(
 			}
 		}
 #endif
-		bcopy(rkp, lkp, numrrecs * sizeof(*lkp));
-		bcopy(rpp, lpp, numrrecs * sizeof(*lpp));
+		memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
+		memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
 		xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 		xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		bcopy(rrp, lrp, numrrecs * sizeof(*lrp));
+		memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
 		xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, numrrecs);
@@ -711,10 +711,10 @@ xfs_bmbt_get_rec(
 		return 0;
 	}
 	rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
-	*off = xfs_bmbt_get_startoff(rp);
-	*bno = xfs_bmbt_get_startblock(rp);
-	*len = xfs_bmbt_get_blockcount(rp);
-	*state = xfs_bmbt_get_state(rp);
+	*off = xfs_bmbt_disk_get_startoff(rp);
+	*bno = xfs_bmbt_disk_get_startblock(rp);
+	*len = xfs_bmbt_disk_get_blockcount(rp);
+	*state = xfs_bmbt_disk_get_state(rp);
 	*stat = 1;
 	return 0;
 }
@@ -757,7 +757,8 @@ xfs_bmbt_insrec(
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
 	XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
 	ncur = (xfs_btree_cur_t *)0;
-	INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(recp));
+	INT_SET(key.br_startoff, ARCH_CONVERT,
+		xfs_bmbt_disk_get_startoff(recp));
 	optr = ptr = cur->bc_ptrs[level];
 	if (ptr == 0) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -835,7 +836,7 @@ xfs_bmbt_insrec(
 						}
 #endif
 						ptr = cur->bc_ptrs[level];
-						xfs_bmbt_set_allf(&nrec,
+						xfs_bmbt_disk_set_allf(&nrec,
 							nkey.br_startoff, 0, 0,
 							XFS_EXT_NORM);
 					} else {
@@ -861,9 +862,9 @@ xfs_bmbt_insrec(
 			}
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(numrecs - ptr + 1) * sizeof(*kp));
-		ovbcopy(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
+		memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
 			(numrecs - ptr + 1) * sizeof(*pp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop,
@@ -880,7 +881,7 @@ xfs_bmbt_insrec(
 		xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
 	} else {
 		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(numrecs - ptr + 1) * sizeof(*rp));
 		rp[ptr - 1] = *recp;
 		numrecs++;
@@ -980,7 +981,7 @@ xfs_bmbt_killroot(
 	ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	bcopy(ckp, kp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(kp, ckp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
@@ -991,7 +992,7 @@ xfs_bmbt_killroot(
 		}
 	}
 #endif
-	bcopy(cpp, pp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(pp, cpp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
 	xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
 		cur->bc_private.b.flist, cur->bc_mp);
 	if (!async)
@@ -1175,7 +1176,7 @@ xfs_bmbt_lookup(
 					startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT);
 				} else {
 					krp = krbase + keyno - 1;
-					startoff = xfs_bmbt_get_startoff(krp);
+					startoff = xfs_bmbt_disk_get_startoff(krp);
 				}
 				diff = (xfs_sfiloff_t)
 						(startoff - rp->br_startoff);
@@ -1349,14 +1350,15 @@ xfs_bmbt_lshift(
 			}
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, rrecs * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, rrecs * sizeof(*rpp));
+		memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
+		memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
 		xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
 		xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
 	} else {
-		ovbcopy(rrp + 1, rrp, rrecs * sizeof(*rrp));
+		memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
-		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		INT_SET(key.br_startoff, ARCH_CONVERT,
+			xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
 	if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
@@ -1452,8 +1454,8 @@ xfs_bmbt_rshift(
 			}
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -1467,10 +1469,11 @@ xfs_bmbt_rshift(
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		INT_SET(key.br_startoff, ARCH_CONVERT,
+			xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
@@ -1629,17 +1632,17 @@ xfs_bmbt_split(
 			}
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, i, cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		keyp->br_startoff = xfs_bmbt_get_startoff(rrp);
+		keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp);
 	}
 	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
 	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
@@ -1748,8 +1751,8 @@ xfs_bmdr_to_bmbt(
 	fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
 	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
-	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
-	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
 
 /*
@@ -1874,17 +1877,16 @@ xfs_bmbt_delete(
  * This code must be in sync with the routines xfs_bmbt_get_startoff,
  * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
  */
-void
-xfs_bmbt_get_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s)
+
+static __inline__ void
+__xfs_bmbt_get_all(
+		__uint64_t l0,
+		__uint64_t l1,
+		xfs_bmbt_irec_t *s)
 {
 	int	ext_flag;
 	xfs_exntst_t st;
-	__uint64_t	l0, l1;
 
-	l0 = INT_GET(r->l0, ARCH_CONVERT);
-	l1 = INT_GET(r->l1, ARCH_CONVERT);
 	ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
 	s->br_startoff = ((xfs_fileoff_t)l0 &
 			   XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
@@ -1915,6 +1917,14 @@ xfs_bmbt_get_all(
 	s->br_state = st;
 }
 
+void
+xfs_bmbt_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	__xfs_bmbt_get_all(r->l0, r->l1, s);
+}
+
 /*
  * Get the block pointer for the given level of the cursor.
  * Fill in the buffer pointer, if applicable.
@@ -1941,23 +1951,94 @@ xfs_bmbt_get_block(
 }
 
 /*
- * Extract the blockcount field from a bmap extent record.
+ * Extract the blockcount field from an in memory bmap extent record.
  */
 xfs_filblks_t
 xfs_bmbt_get_blockcount(
 	xfs_bmbt_rec_t	*r)
 {
-	return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+	return (xfs_filblks_t)(r->l1 & XFS_MASK64LO(21));
 }
 
 /*
- * Extract the startblock field from a bmap extent record.
+ * Extract the startblock field from an in memory bmap extent record.
  */
 xfs_fsblock_t
 xfs_bmbt_get_startblock(
 	xfs_bmbt_rec_t	*r)
 {
 #if XFS_BIG_FILESYSTEMS
+	return (((xfs_fsblock_t)r->l0 & XFS_MASK64LO(9)) << 43) |
+	       (((xfs_fsblock_t)r->l1) >> 21);
+#else
+#ifdef DEBUG
+	xfs_dfsbno_t	b;
+
+	b = (((xfs_dfsbno_t)r->l0 & XFS_MASK64LO(9)) << 43) |
+	    (((xfs_dfsbno_t)r->l1) >> 21);
+	ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+	return (xfs_fsblock_t)b;
+#else	/* !DEBUG */
+	return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+/*
+ * Extract the startoff field from an in memory bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+	xfs_bmbt_rec_t	*r)
+{
+	return ((xfs_fileoff_t)r->l0 &
+		 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+}
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+	xfs_bmbt_rec_t	*r)
+{
+	int	ext_flag;
+
+	ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
+	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+				ext_flag);
+}
+
+#if ARCH_CONVERT != ARCH_NOCONVERT
+/* Endian flipping versions of the bmbt extraction functions */
+void
+xfs_bmbt_disk_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	__uint64_t	l0, l1;
+
+	l0 = INT_GET(r->l0, ARCH_CONVERT);
+	l1 = INT_GET(r->l1, ARCH_CONVERT);
+
+	__xfs_bmbt_get_all(l0, l1, s);
+}
+
+/*
+ * Extract the blockcount field from an on disk bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_disk_get_blockcount(
+	xfs_bmbt_rec_t	*r)
+{
+	return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+}
+
+/*
+ * Extract the startblock field from an on disk bmap extent record.
+ */
+xfs_fsblock_t
+xfs_bmbt_disk_get_startblock(
+	xfs_bmbt_rec_t	*r)
+{
+#if XFS_BIG_FILESYSTEMS
 	return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
 	       (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
 #else
@@ -1975,10 +2056,10 @@ xfs_bmbt_get_startblock(
 }
 
 /*
- * Extract the startoff field from a bmap extent record.
+ * Extract the startoff field from a disk format bmap extent record.
  */
 xfs_fileoff_t
-xfs_bmbt_get_startoff(
+xfs_bmbt_disk_get_startoff(
 	xfs_bmbt_rec_t	*r)
 {
 	return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
@@ -1986,15 +2067,16 @@ xfs_bmbt_get_startoff(
 }
 
 xfs_exntst_t
-xfs_bmbt_get_state(
+xfs_bmbt_disk_get_state(
 	xfs_bmbt_rec_t	*r)
 {
 	int	ext_flag;
 
 	ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
-	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+	return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
 				ext_flag);
 }
+#endif
 
 
 /*
@@ -2103,7 +2185,7 @@ xfs_bmbt_insert(
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
 	level = 0;
 	nbno = NULLFSBLOCK;
-	xfs_bmbt_set_all(&nrec, &cur->bc_rec.b);
+	xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
 	ncur = (xfs_btree_cur_t *)0;
 	pcur = cur;
 	do {
@@ -2333,7 +2415,7 @@ xfs_bmbt_newroot(
 	cur->bc_ptrs[level + 1] = 1;
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	bcopy(kp, ckp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(ckp, kp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
 	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
@@ -2343,7 +2425,7 @@ xfs_bmbt_newroot(
 		}
 	}
 #endif
-	bcopy(pp, cpp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(cpp, pp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
 #ifdef DEBUG
 	if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
 			level))) {
@@ -2388,6 +2470,97 @@ xfs_bmbt_set_all(
 	ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
 #endif	/* XFS_BIG_FILESYSTEMS */
 #if XFS_BIG_FILESYSTEMS
+	r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+		 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+		 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43);
+	r->l1 = ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+		 ((xfs_bmbt_rec_base_t)s->br_blockcount &
+		 (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(s->br_startblock)) {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+			  (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	} else {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9);
+		r->l1 = ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v)
+{
+	int	extent_flag;
+
+	ASSERT((v == XFS_EXT_NORM) || (v == XFS_EXT_UNWRITTEN));
+	extent_flag = (v == XFS_EXT_NORM) ? 0 : 1;
+	ASSERT((o & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0);
+	ASSERT((c & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((b & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if XFS_BIG_FILESYSTEMS
+	r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+		((xfs_bmbt_rec_base_t)o << 9) |
+		((xfs_bmbt_rec_base_t)b >> 43));
+	r->l1 = ((xfs_bmbt_rec_base_t)b << 21) |
+		((xfs_bmbt_rec_base_t)c &
+		(xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(b)) {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9) |
+			 (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)b << 21) |
+			  ((xfs_bmbt_rec_base_t)c &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	} else {
+		r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9);
+		r->l1 = ((xfs_bmbt_rec_base_t)b << 21) |
+			 ((xfs_bmbt_rec_base_t)c &
+			 (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+}
+
+#if ARCH_CONVERT != ARCH_NOCONVERT
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+void
+xfs_bmbt_disk_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	int	extent_flag;
+
+	ASSERT((s->br_state == XFS_EXT_NORM) ||
+		(s->br_state == XFS_EXT_UNWRITTEN));
+	extent_flag = (s->br_state == XFS_EXT_NORM) ? 0 : 1;
+	ASSERT((s->br_startoff & XFS_MASK64HI(9)) == 0);
+	ASSERT((s->br_blockcount & XFS_MASK64HI(43)) == 0);
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if XFS_BIG_FILESYSTEMS
 	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
 		  ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
 		  ((xfs_bmbt_rec_base_t)s->br_startblock >> 43));
@@ -2414,10 +2587,10 @@ xfs_bmbt_set_all(
 }
 
 /*
- * Set all the fields in a bmap extent record from the arguments.
+ * Set all the fields in a disk format bmap extent record from the arguments.
  */
 void
-xfs_bmbt_set_allf(
+xfs_bmbt_disk_set_allf(
 	xfs_bmbt_rec_t	*r,
 	xfs_fileoff_t	o,
 	xfs_fsblock_t	b,
@@ -2458,6 +2631,7 @@ xfs_bmbt_set_allf(
 	}
 #endif	/* XFS_BIG_FILESYSTEMS */
 }
+#endif
 
 /*
  * Set the blockcount field in a bmap extent record.
@@ -2468,8 +2642,8 @@ xfs_bmbt_set_blockcount(
 	xfs_filblks_t	v)
 {
 	ASSERT((v & XFS_MASK64HI(43)) == 0);
-	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
-		  (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21)));
+	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
+		  (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21));
 }
 
 /*
@@ -2484,20 +2658,20 @@ xfs_bmbt_set_startblock(
 	ASSERT((v & XFS_MASK64HI(12)) == 0);
 #endif	/* XFS_BIG_FILESYSTEMS */
 #if XFS_BIG_FILESYSTEMS
-	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
-		  (xfs_bmbt_rec_base_t)(v >> 43));
-	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
-		  (xfs_bmbt_rec_base_t)(v << 21));
+	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
+		  (xfs_bmbt_rec_base_t)(v >> 43);
+	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
+		  (xfs_bmbt_rec_base_t)(v << 21);
 #else	/* !XFS_BIG_FILESYSTEMS */
 	if (ISNULLSTARTBLOCK(v)) {
-		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) | (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
-		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
+		r->l0 |= (xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
 			  ((xfs_bmbt_rec_base_t)v << 21) |
-			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+			  (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
 	} else {
-		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
-		INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 21) |
-			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+		r->l0 &= ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9);
+		r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
+			  (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21));
 	}
 #endif	/* XFS_BIG_FILESYSTEMS */
 }
@@ -2511,9 +2685,9 @@ xfs_bmbt_set_startoff(
 	xfs_fileoff_t	v)
 {
 	ASSERT((v & XFS_MASK64HI(9)) == 0);
-	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
+	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
 		((xfs_bmbt_rec_base_t)v << 9) |
-		  (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+		  (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
 }
 
 /*
@@ -2526,9 +2700,9 @@ xfs_bmbt_set_state(
 {
 	ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
 	if (v == XFS_EXT_NORM)
-		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN));
+		r->l0 &= XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN);
 	else
-		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN));
+		r->l0 |= XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN);
 }
 
 /*
@@ -2559,8 +2733,8 @@ xfs_bmbt_to_bmdr(
 	fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
 	tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
-	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
-	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
 
 /*
@@ -2596,7 +2770,7 @@ xfs_bmbt_update(
 #endif
 	ptr = cur->bc_ptrs[0];
 	rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
-	xfs_bmbt_set_allf(rp, off, bno, len, state);
+	xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
 	xfs_bmbt_log_recs(cur, bp, ptr, ptr);
 	if (ptr > 1) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -2618,13 +2792,14 @@ xfs_bmbt_update(
  * Return an error condition (1) if any flags found,
  * otherwise return 0.
  */
+
 int
 xfs_check_nostate_extents(
 	xfs_bmbt_rec_t		*ep,
 	xfs_extnum_t		num)
 {
 	for (; num > 0; num--, ep++) {
-		if (((INT_GET(ep->l0, ARCH_CONVERT)) >>
+		if ((ep->l0 >>
 		     (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
 			ASSERT(0);
 			return 1;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index a9ec9c58252d..8aeefd43c967 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -509,6 +509,41 @@ xfs_exntst_t
 xfs_bmbt_get_state(
 	xfs_bmbt_rec_t	*r);
 
+#if ARCH_CONVERT != ARCH_NOCONVERT
+void
+xfs_bmbt_disk_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s);
+
+xfs_exntst_t
+xfs_bmbt_disk_get_state(
+	xfs_bmbt_rec_t	*r);
+
+xfs_filblks_t
+xfs_bmbt_disk_get_blockcount(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fsblock_t
+xfs_bmbt_disk_get_startblock(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fileoff_t
+xfs_bmbt_disk_get_startoff(
+	xfs_bmbt_rec_t	*r);
+
+#else
+#define xfs_bmbt_disk_get_all(r, s) \
+	xfs_bmbt_get_all(r, s)
+#define xfs_bmbt_disk_get_state(r) \
+	xfs_bmbt_get_state(r)
+#define xfs_bmbt_disk_get_blockcount(r) \
+	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startblock(r) \
+	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startoff(r) \
+	xfs_bmbt_get_startoff(r)
+#endif
+
 int
 xfs_bmbt_increment(
 	struct xfs_btree_cur *,
@@ -607,6 +642,26 @@ xfs_bmbt_set_state(
 	xfs_bmbt_rec_t	*r,
 	xfs_exntst_t	v);
 
+#if ARCH_CONVERT != ARCH_NOCONVERT
+void
+xfs_bmbt_disk_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s);
+
+void
+xfs_bmbt_disk_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v);
+#else
+#define xfs_bmbt_disk_set_all(r, s) \
+	xfs_bmbt_set_all(r, s)
+#define xfs_bmbt_disk_set_allf(r, 0, b, c, v) \
+	xfs_bmbt_set_allf(r, 0, b, c, v)
+#endif
+
 void
 xfs_bmbt_to_bmdr(
 	xfs_bmbt_block_t *,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 7dcef68fb253..115b05df35ba 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -261,9 +261,9 @@ xfs_btree_check_rec(
 
 		r1 = ar1;
 		r2 = ar2;
-		ASSERT(xfs_bmbt_get_startoff(r1) +
-		       xfs_bmbt_get_blockcount(r1) <=
-		       xfs_bmbt_get_startoff(r2));
+		ASSERT(xfs_bmbt_disk_get_startoff(r1) +
+		       xfs_bmbt_disk_get_blockcount(r1) <=
+		       xfs_bmbt_disk_get_startoff(r2));
 		break;
 	    }
 	case XFS_BTNUM_INO: {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index ac62646cacde..8a837fab5ad0 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -766,7 +766,7 @@ xfs_buf_item_init(
 	 * to have logged.
 	 */
 	bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
-	bcopy(XFS_BUF_PTR(bp), bip->bli_orig, XFS_BUF_COUNT(bp));
+	memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
 	bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
 #endif
 
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 14195151eeea..fd7d80ebe2ef 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -112,15 +112,14 @@ struct xfs_mount_args {
 						/* only) */
 #define XFSMNT_NOTSERVER	0x00100000	/* give up being the server */
 						/* (remount only) */
-#define XFSMNT_DMAPI		0x00200000	/* enable dmapi/xdsm */
+#define XFSMNT_32BITINODES	0x00200000	/* restrict inodes to 32
+						 * bits of address space */
 #define XFSMNT_GQUOTA		0x00400000	/* group quota accounting */
 #define XFSMNT_GQUOTAENF	0x00800000	/* group quota limit
 						 * enforcement */
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
-#define XFSMNT_32BITINODES	0x02000000	/* restrict inodes to 32
-						 * bits of address space */
-#define XFSMNT_IRIXSGID		0x04000000	/* Irix-style sgid inheritance */
-#define XFSMNT_NOLOGFLUSH	0x08000000	/* Don't flush for log blocks */
+#define XFSMNT_DMAPI		0x02000000	/* enable dmapi/xdsm */
+#define XFSMNT_NOLOGFLUSH	0x04000000	/* Don't flush for log blocks */
 
 /* Did we get any args for CXFS to consume? */
 #define XFSARGS_FOR_CXFSARR(ap)		\
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 020801f897ae..06d8371730bc 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -360,7 +360,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
 			     (char *)leaf);
 	}
-	bcopy(oldroot, node, size);
+	memcpy(node, oldroot, size);
 	xfs_da_log_buf(tp, bp, 0, size - 1);
 	xfs_da_buf_done(blk1->bp);
 	blk1->bp = bp;
@@ -527,7 +527,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
 			btree_s = &node2->btree[0];
 			btree_d = &node2->btree[count];
-			ovbcopy(btree_s, btree_d, tmp);
+			memmove(btree_d, btree_s, tmp);
 		}
 
 		/*
@@ -538,7 +538,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
 		btree_d = &node2->btree[0];
-		bcopy(btree_s, btree_d, tmp);
+		memcpy(btree_d, btree_s, tmp);
 		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
 
 	} else {
@@ -550,7 +550,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[0];
 		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
-		bcopy(btree_s, btree_d, tmp);
+		memcpy(btree_d, btree_s, tmp);
 		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
 		xfs_da_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
@@ -562,7 +562,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[count];
 		btree_d = &node2->btree[0];
-		ovbcopy(btree_s, btree_d, tmp);
+		memmove(btree_d, btree_s, tmp);
 		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
 	}
 
@@ -622,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	btree = &node->btree[ oldblk->index ];
 	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
 		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
-		ovbcopy(btree, btree + 1, tmp);
+		memmove(btree + 1, btree, tmp);
 	}
 	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
 	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
@@ -790,7 +790,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	}
 	ASSERT(INT_ISZERO(blkinfo->forw, ARCH_CONVERT));
 	ASSERT(INT_ISZERO(blkinfo->back, ARCH_CONVERT));
-	bcopy(bp->data, root_blk->bp->data, state->blocksize);
+	memcpy(root_blk->bp->data, bp->data, state->blocksize);
 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 	error = xfs_da_shrink_inode(args, child, bp);
 	return(error);
@@ -842,7 +842,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -898,7 +898,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1001,12 +1001,12 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
-		ovbcopy(btree + 1, btree, tmp);
+		memmove(btree, btree + 1, tmp);
 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
 		    XFS_DA_LOGRANGE(node, btree, tmp));
 		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
 	}
-	bzero((char *)btree, sizeof(xfs_da_node_entry_t));
+	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
@@ -1049,7 +1049,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	{
 		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
 		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
-		ovbcopy(&save_node->btree[0], btree, tmp);
+		memmove(btree, &save_node->btree[0], tmp);
 		btree = &save_node->btree[0];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
@@ -1067,7 +1067,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Move all the B-tree elements from drop_blk to save_blk.
 	 */
 	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
-	bcopy(&drop_node->btree[0], btree, tmp);
+	memcpy(btree, &drop_node->btree[0], tmp);
 	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
 
 	xfs_da_log_buf(tp, save_blk->bp,
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Copy the last block into the dead buffer and log it.
 	 */
-	bcopy(last_buf->data, dead_buf->data, mp->m_dirblksize);
+	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
 	dead_info = dead_buf->data;
 	/*
@@ -2343,7 +2343,7 @@ xfs_da_state_free(xfs_da_state_t *state)
 	if (state->extravalid && state->extrablk.bp)
 		xfs_da_buf_done(state->extrablk.bp);
 #ifdef DEBUG
-	bzero((char *)state, sizeof(*state));
+	memset((char *)state, 0, sizeof(*state));
 #endif /* DEBUG */
 	kmem_zone_free(xfs_da_state_zone, state);
 }
@@ -2390,7 +2390,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
 			bp = bps[i];
-			bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+			memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
 				XFS_BUF_COUNT(bp));
 		}
 	}
@@ -2431,7 +2431,7 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
 		for (i = off = 0; i < dabuf->nbuf;
 				i++, off += XFS_BUF_COUNT(bp)) {
 			bp = dabuf->bps[i];
-			bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+			memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
 				XFS_BUF_COUNT(bp));
 		}
 	}
@@ -2462,7 +2462,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 			dabuf->next->prev = dabuf->prev;
 		mutex_spinunlock(&xfs_dabuf_global_lock, s);
 	}
-	bzero(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
 #endif
 	if (dabuf->nbuf == 1)
 		kmem_zone_free(xfs_dabuf_zone, dabuf);
@@ -2532,7 +2532,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 		bp = dabuf->bps[0];
 	} else {
 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 	}
 	xfs_da_buf_done(dabuf);
 	for (i = 0; i < nbuf; i++)
@@ -2558,7 +2558,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 		bp = dabuf->bps[0];
 	} else {
 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 	}
 	xfs_da_buf_done(dabuf);
 	for (i = 0; i < nbuf; i++)
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index ea2f5798cf70..ad3ecd88f1a6 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -210,7 +210,7 @@ xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
 	xfs_da_args_t args;
 	int error;
 
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = dir;
 	args.trans = trans;
 
@@ -534,7 +534,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 		return 0;
 	}
 	if (INT_GET(dp->di_core.di_size, ARCH_CONVERT) < sizeof(sf->hdr)) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p\n",
+		xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p",
 			dp);
 		return 1;
 	}
@@ -546,7 +546,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 	count = sf->hdr.count;
 	if ((count < 0) || ((count * 10) > XFS_LITINO(mp))) {
 		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform count: dp 0x%p\n", dp);
+			"Invalid shortform count: dp 0x%p", dp);
 		return(1);
 	}
 
@@ -561,7 +561,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 		xfs_dir_ino_validate(mp, ino);
 		if (sfe->namelen >= XFS_LITINO(mp)) {
 			xfs_fs_cmn_err(CE_WARN, mp,
-				"Invalid shortform namelen: dp 0x%p\n", dp);
+				"Invalid shortform namelen: dp 0x%p", dp);
 			return 1;
 		}
 		namelen_sum += sfe->namelen;
@@ -569,7 +569,7 @@ xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
 	}
 	if (namelen_sum >= XFS_LITINO(mp)) {
 		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform namelen: dp 0x%p\n", dp);
+			"Invalid shortform namelen: dp 0x%p", dp);
 		return 1;
 	}
 
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 1ee94f626cc1..403117dd28ff 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -147,7 +147,7 @@ xfs_dir2_init(
 	xfs_da_args_t	args;		/* operation arguments */
 	int		error;		/* error return value */
 
-	bzero((char *)&args, sizeof(args));
+	memset((char *)&args, 0, sizeof(args));
 	args.dp = dp;
 	args.trans = tp;
 	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
@@ -711,7 +711,7 @@ xfs_dir2_put_dirent64_direct(
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
 	pa->done = 1;
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	return 0;
 }
 
@@ -743,7 +743,7 @@ xfs_dir2_put_dirent64_uio(
 	idbp->d_ino = pa->ino;
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	rval = uiomove((caddr_t)idbp, reclen, UIO_READ, uio);
 	pa->done = (rval == 0);
 	return rval;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index de56814d5d25..3756923e8740 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -331,7 +331,7 @@ xfs_dir2_block_addname(
 		blp--;
 		mid++;
 		if (mid)
-			ovbcopy(&blp[1], blp, mid * sizeof(*blp));
+			memmove(blp, &blp[1], mid * sizeof(*blp));
 		lfloglow = 0;
 		lfloghigh = mid;
 	}
@@ -357,7 +357,7 @@ xfs_dir2_block_addname(
 		    (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
 		     mid - lowstale <= highstale - mid)) {
 			if (mid - lowstale)
-				ovbcopy(&blp[lowstale + 1], &blp[lowstale],
+				memmove(&blp[lowstale], &blp[lowstale + 1],
 					(mid - lowstale) * sizeof(*blp));
 			lfloglow = MIN(lowstale, lfloglow);
 			lfloghigh = MAX(mid, lfloghigh);
@@ -369,7 +369,7 @@ xfs_dir2_block_addname(
 			ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
 			mid++;
 			if (highstale - mid)
-				ovbcopy(&blp[mid], &blp[mid + 1],
+				memmove(&blp[mid + 1], &blp[mid],
 					(highstale - mid) * sizeof(*blp));
 			lfloglow = MIN(mid, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
@@ -397,7 +397,7 @@ xfs_dir2_block_addname(
 	 */
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, args->namelen);
+	memcpy(dep->name, args->name, args->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	/*
@@ -717,7 +717,7 @@ xfs_dir2_block_lookup_int(
 		 */
 		if (dep->namelen == args->namelen &&
 		    dep->name[0] == args->name[0] &&
-		    bcmp(dep->name, args->name, args->namelen) == 0) {
+		    memcmp(dep->name, args->name, args->namelen) == 0) {
 			*bpp = bp;
 			*entno = mid;
 			return 0;
@@ -1075,7 +1075,7 @@ xfs_dir2_sf_to_block(
 	buf_len = dp->i_df.if_bytes;
 	buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
 
-	bcopy(sfp, buf, dp->i_df.if_bytes);
+	memcpy(buf, sfp, dp->i_df.if_bytes);
 	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
 	dp->i_d.di_size = 0;
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@@ -1199,7 +1199,7 @@ xfs_dir2_sf_to_block(
 		INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT));
 		dep->namelen = sfep->namelen;
-		bcopy(sfep->name, dep->name, dep->namelen);
+		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 		xfs_dir2_data_log_entry(tp, bp, dep);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 7ea956729cf1..7481245193d4 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -340,7 +340,7 @@ xfs_dir2_data_freescan(
 	/*
 	 * Start by clearing the table.
 	 */
-	bzero(d->hdr.bestfree, sizeof(d->hdr.bestfree));
+	memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
 	*loghead = 1;
 	/*
 	 * Set up pointers.
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index c201111f7339..7ab474f6a3a3 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -110,7 +110,7 @@ xfs_dir2_block_to_leaf(
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
-	bcopy(blp, leaf->ents, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+	memcpy(leaf->ents, blp, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
 	needscan = 0;
 	needlog = 1;
@@ -353,7 +353,7 @@ xfs_dir2_leaf_addname(
 		 */
 		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
 			bestsp--;
-			ovbcopy(&bestsp[1], &bestsp[0],
+			memmove(&bestsp[0], &bestsp[1],
 				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
 			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
 			xfs_dir2_leaf_log_tail(tp, lbp);
@@ -402,7 +402,7 @@ xfs_dir2_leaf_addname(
 	dep = (xfs_dir2_data_entry_t *)dup;
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, dep->namelen);
+	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
 	/*
@@ -434,7 +434,7 @@ xfs_dir2_leaf_addname(
 		 * lep is still good as the index leaf entry.
 		 */
 		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
-			ovbcopy(lep, lep + 1,
+			memmove(lep + 1, lep,
 				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
 		/*
 		 * Record low and high logging indices for the leaf.
@@ -493,8 +493,8 @@ xfs_dir2_leaf_addname(
 			 * and make room for the new entry.
 			 */
 			if (index - lowstale - 1 > 0)
-				ovbcopy(&leaf->ents[lowstale + 1],
-					&leaf->ents[lowstale],
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
 					(index - lowstale - 1) * sizeof(*lep));
 			lep = &leaf->ents[index - 1];
 			lfloglow = MIN(lowstale, lfloglow);
@@ -512,8 +512,8 @@ xfs_dir2_leaf_addname(
 			 * and make room for the new entry.
 			 */
 			if (highstale - index > 0)
-				ovbcopy(&leaf->ents[index],
-					&leaf->ents[index + 1],
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
 					(highstale - index) * sizeof(*lep));
 			lep = &leaf->ents[index];
 			lfloglow = MIN(index, lfloglow);
@@ -847,7 +847,7 @@ xfs_dir2_leaf_getdents(
 					 * the table.
 					 */
 					if (!map->br_blockcount && --map_valid)
-						ovbcopy(&map[1], &map[0],
+						memmove(&map[0], &map[1],
 							sizeof(map[0]) *
 							map_valid);
 					i -= j;
@@ -909,8 +909,8 @@ xfs_dir2_leaf_getdents(
 						nmap--;
 						length = map_valid + nmap - i;
 						if (length)
-							ovbcopy(&map[i + 1],
-								&map[i],
+							memmove(&map[i],
+								&map[i + 1],
 								sizeof(map[i]) *
 								length);
 					} else {
@@ -1409,7 +1409,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		if (dep->namelen == args->namelen &&
 		    dep->name[0] == args->name[0] &&
-		    bcmp(dep->name, args->name, args->namelen) == 0) {
+		    memcmp(dep->name, args->name, args->namelen) == 0) {
 			*dbpp = dbp;
 			*indexp = index;
 			return 0;
@@ -1544,7 +1544,7 @@ xfs_dir2_leaf_removename(
 			 * Copy the table down so inactive entries at the
 			 * end are removed.
 			 */
-			ovbcopy(bestsp, &bestsp[db - i],
+			memmove(&bestsp[db - i], bestsp,
 				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
 			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
 			xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1728,7 +1728,7 @@ xfs_dir2_leaf_trim_data(
 	 */
 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
 	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
-	ovbcopy(&bestsp[0], &bestsp[1], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+	memmove(&bestsp[1], &bestsp[0], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	return 0;
@@ -1842,7 +1842,7 @@ xfs_dir2_node_to_leaf(
 	/*
 	 * Set up the leaf bests table.
 	 */
-	bcopy(free->bests, XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT),
+	memcpy(XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), free->bests,
 		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 99661539e595..54234b40ed6b 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -239,7 +239,7 @@ xfs_dir2_leafn_add(
 	if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
 		lep = &leaf->ents[index];
 		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
-			ovbcopy(lep, lep + 1,
+			memmove(lep + 1, lep,
 				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
 		lfloglow = index;
 		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
@@ -288,8 +288,8 @@ xfs_dir2_leafn_add(
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(index - lowstale - 1 >= 0);
 			if (index - lowstale - 1 > 0)
-				ovbcopy(&leaf->ents[lowstale + 1],
-					&leaf->ents[lowstale],
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
 					(index - lowstale - 1) * sizeof(*lep));
 			lep = &leaf->ents[index - 1];
 			lfloglow = MIN(lowstale, lfloglow);
@@ -304,8 +304,8 @@ xfs_dir2_leafn_add(
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(highstale - index >= 0);
 			if (highstale - index > 0)
-				ovbcopy(&leaf->ents[index],
-					&leaf->ents[index + 1],
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
 					(highstale - index) * sizeof(*lep));
 			lep = &leaf->ents[index];
 			lfloglow = MIN(index, lfloglow);
@@ -564,7 +564,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			if (dep->namelen == args->namelen &&
 			    dep->name[0] == args->name[0] &&
-			    bcmp(dep->name, args->name, args->namelen) == 0) {
+			    memcmp(dep->name, args->name, args->namelen) == 0) {
 				args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
 				*indexp = index;
 				state->extravalid = 1;
@@ -644,7 +644,7 @@ xfs_dir2_leafn_moveents(
 	 * to hold the new entries.
 	 */
 	if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
-		ovbcopy(&leaf_d->ents[start_d], &leaf_d->ents[start_d + count],
+		memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d],
 			(INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
 			sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
@@ -666,7 +666,7 @@ xfs_dir2_leafn_moveents(
 	/*
 	 * Copy the leaf entries from source to destination.
 	 */
-	bcopy(&leaf_s->ents[start_s], &leaf_d->ents[start_d],
+	memcpy(&leaf_d->ents[start_d], &leaf_s->ents[start_s],
 		count * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
 	/*
@@ -674,7 +674,7 @@ xfs_dir2_leafn_moveents(
 	 * delete the ones we copied by sliding the next ones down.
 	 */
 	if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
-		ovbcopy(&leaf_s->ents[start_s + count], &leaf_s->ents[start_s],
+		memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count],
 			count * sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
 	}
@@ -1135,7 +1135,7 @@ xfs_dir2_leafn_toosmall(
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
 		if (error)
@@ -1197,7 +1197,7 @@ xfs_dir2_leafn_toosmall(
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno)
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
@@ -1685,7 +1685,7 @@ xfs_dir2_node_addname_int(
 	dep = (xfs_dir2_data_entry_t *)dup;
 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
 	dep->namelen = args->namelen;
-	bcopy(args->name, dep->name, dep->namelen);
+	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
 	xfs_dir2_data_log_entry(tp, dbp, dep);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1d2d3d9b2c2..aaba9972bd57 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -176,7 +176,7 @@ xfs_dir2_block_to_sf(
 	 * and add local data.
 	 */
 	block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-	bcopy(bp->data, block, mp->m_dirblksize);
+	memcpy(block, bp->data, mp->m_dirblksize);
 	logflags = XFS_ILOG_CORE;
 	if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
 		ASSERT(error != ENOSPC);
@@ -198,7 +198,7 @@ xfs_dir2_block_to_sf(
 	 * Copy the header into the newly allocate local space.
 	 */
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	bcopy(sfhp, sfp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+	memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
 	dp->i_d.di_size = size;
 	/*
 	 * Set up to loop over the block's entries.
@@ -241,7 +241,7 @@ xfs_dir2_block_to_sf(
 			XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)block), ARCH_CONVERT);
-			bcopy(dep->name, sfep->name, dep->namelen);
+			memcpy(sfep->name, dep->name, dep->namelen);
 			temp=INT_GET(dep->inumber, ARCH_CONVERT);
 			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &temp,
 				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -405,7 +405,7 @@ xfs_dir2_sf_addname_easy(
 	 */
 	sfep->namelen = args->namelen;
 	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
-	bcopy(args->name, sfep->name, sfep->namelen);
+	memcpy(sfep->name, args->name, sfep->namelen);
 	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
 		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
 	/*
@@ -457,7 +457,7 @@ xfs_dir2_sf_addname_hard(
 	old_isize = (int)dp->i_d.di_size;
 	buf = kmem_alloc(old_isize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)buf;
-	bcopy(sfp, oldsfp, old_isize);
+	memcpy(oldsfp, sfp, old_isize);
 	/*
 	 * Loop over the old directory finding the place we're going
 	 * to insert the new entry.
@@ -490,14 +490,14 @@ xfs_dir2_sf_addname_hard(
 	 * Copy the first part of the directory, including the header.
 	 */
 	nbytes = (int)((char *)oldsfep - (char *)oldsfp);
-	bcopy(oldsfp, sfp, nbytes);
+	memcpy(sfp, oldsfp, nbytes);
 	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
 	/*
 	 * Fill in the new entry, and update the header counts.
 	 */
 	sfep->namelen = args->namelen;
 	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
-	bcopy(args->name, sfep->name, sfep->namelen);
+	memcpy(sfep->name, args->name, sfep->namelen);
 	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
 		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
 	sfp->hdr.count++;
@@ -510,7 +510,7 @@ xfs_dir2_sf_addname_hard(
 	 */
 	if (!eof) {
 		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
-		bcopy(oldsfep, sfep, old_isize - nbytes);
+		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
 	kmem_free(buf, old_isize);
 	dp->i_d.di_size = new_isize;
@@ -916,7 +916,7 @@ xfs_dir2_sf_lookup(
 	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
-		    bcmp(args->name, sfep->name, args->namelen) == 0) {
+		    memcmp(args->name, sfep->name, args->namelen) == 0) {
 			args->inumber =
 				XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -971,7 +971,7 @@ xfs_dir2_sf_removename(
 	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
-		    bcmp(sfep->name, args->name, args->namelen) == 0) {
+		    memcmp(sfep->name, args->name, args->namelen) == 0) {
 			ASSERT(XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT) ==
 				args->inumber);
@@ -994,7 +994,7 @@ xfs_dir2_sf_removename(
 	 * Copy the part if any after the removed entry, sliding it down.
 	 */
 	if (byteoff + entsize < oldsize)
-		ovbcopy((char *)sfp + byteoff + entsize, (char *)sfp + byteoff,
+		memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize,
 			oldsize - (byteoff + entsize));
 	/*
 	 * Fix up the header and file size.
@@ -1108,7 +1108,7 @@ xfs_dir2_sf_replace(
 		     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
 			if (sfep->namelen == args->namelen &&
 			    sfep->name[0] == args->name[0] &&
-			    bcmp(args->name, sfep->name, args->namelen) == 0) {
+			    memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_FILESYSTEMS || defined(DEBUG)
 				ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
 					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -1196,7 +1196,7 @@ xfs_dir2_sf_toino4(
 	buf = kmem_alloc(oldsize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	ASSERT(oldsfp->hdr.i8count == 1);
-	bcopy(oldsfp, buf, oldsize);
+	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
@@ -1228,7 +1228,7 @@ xfs_dir2_sf_toino4(
 		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
-		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
 			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
 		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
@@ -1273,7 +1273,7 @@ xfs_dir2_sf_toino8(
 	buf = kmem_alloc(oldsize, KM_SLEEP);
 	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	ASSERT(oldsfp->hdr.i8count == 0);
-	bcopy(oldsfp, buf, oldsize);
+	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
@@ -1305,7 +1305,7 @@ xfs_dir2_sf_toino8(
 		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
-		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
 			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
 		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 576c19f17c68..7ee59c6107b3 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -93,7 +93,7 @@ typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
  * be calculated on the fly.
  *
  * Entries are packed toward the top as tightly as possible.  The header
- * and the elements must be bcopy()'d out into a work area to get correct
+ * and the elements must be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
 typedef struct xfs_dir2_sf_hdr {
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index fe9280e1f427..3e517abee119 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -65,9 +65,9 @@ xfs_dir2_trace_enter(
 	ASSERT(xfs_dir2_trace_buf);
 	ASSERT(dp->i_dir_trace);
 	if (name)
-		bcopy(name, n, min(sizeof(n), namelen));
+		memcpy(n, name, min(sizeof(n), namelen));
 	else
-		bzero((char *)n, sizeof(n));
+		memset((char *)n, 0, sizeof(n));
 	ktrace_enter(xfs_dir2_trace_buf,
 		(void *)(__psunsigned_t)type, (void *)where,
 		(void *)a0, (void *)a1, (void *)a2, (void *)a3,
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 1aceaf37693d..d689c169f6e7 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -105,7 +105,7 @@ xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
 		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
 	if (XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
 			XFS_RANDOM_DIR_INO_VALIDATE)) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx\n",
+		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
 				(unsigned long long) ino);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
@@ -171,7 +171,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    args->name[0] == sfe->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0)
+		    memcmp(args->name, sfe->name, args->namelen) == 0)
 			return(XFS_ERROR(EEXIST));
 		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
 	}
@@ -184,7 +184,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 
 	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
 	sfe->namelen = args->namelen;
-	bcopy(args->name, sfe->name, sfe->namelen);
+	memcpy(sfe->name, args->name, sfe->namelen);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
 
 	dp->i_d.di_size += size;
@@ -223,7 +223,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(sfe->name, args->name, args->namelen) == 0)
+		    memcmp(sfe->name, args->name, args->namelen) == 0)
 			break;
 		base += size;
 		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
@@ -234,7 +234,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 	}
 
 	if ((base + size) != dp->i_d.di_size) {
-		ovbcopy(&((char *)sf)[base+size], &((char *)sf)[base],
+		memmove(&((char *)sf)[base], &((char *)sf)[base+size],
 					      dp->i_d.di_size - (base+size));
 	}
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
@@ -283,7 +283,7 @@ xfs_dir_shortform_lookup(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0) {
+		    memcmp(args->name, sfe->name, args->namelen) == 0) {
 			XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args->inumber, ARCH_CONVERT);
 			return(XFS_ERROR(EEXIST));
 		}
@@ -324,7 +324,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
 
-	bcopy(dp->i_df.if_u1.if_data, tmpbuffer, size);
+	memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
 
 	sf = (xfs_dir_shortform_t *)tmpbuffer;
 	XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &inumber, ARCH_CONVERT);
@@ -611,8 +611,8 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
 	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
-		    bcmp(args->name, sfe->name, args->namelen) == 0) {
-			ASSERT(bcmp((char *)&args->inumber,
+		    memcmp(args->name, sfe->name, args->namelen) == 0) {
+			ASSERT(memcmp((char *)&args->inumber,
 				(char *)&sfe->inumber, sizeof(xfs_ino_t)));
 			XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
 			xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
@@ -650,10 +650,10 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 	if (retval)
 		return(retval);
 	ASSERT(bp != NULL);
-	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
 	 * Find and special case the parent inode number
@@ -736,7 +736,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 		return(retval);
 	}
 	ASSERT(bp2 != NULL);
-	bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
 	xfs_da_buf_done(bp1);
 	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
@@ -787,7 +787,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 		return(retval);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
@@ -960,7 +960,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
 		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
-		ovbcopy(entry, entry + 1, tmp);
+		memmove(entry + 1, entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
 	}
@@ -986,7 +986,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	 */
 	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
 	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &namest->inumber, ARCH_CONVERT);
-	bcopy(args->name, namest->name, args->namelen);
+	memcpy(namest->name, args->name, args->namelen);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
 
@@ -1029,7 +1029,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 	lbsize = XFS_LBSIZE(mp);
 	tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
-	bcopy(bp->data, tmpbuffer, lbsize);
+	memcpy(tmpbuffer, bp->data, lbsize);
 
 	/*
 	 * Make a second copy in case xfs_dir_leaf_moveents()
@@ -1037,9 +1037,9 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 	 */
 	if (musthave || justcheck) {
 		tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
-		bcopy(bp->data, tmpbuffer2, lbsize);
+		memcpy(tmpbuffer2, bp->data, lbsize);
 	}
-	bzero(bp->data, lbsize);
+	memset(bp->data, 0, lbsize);
 
 	/*
 	 * Copy basic information
@@ -1072,7 +1072,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
 
 	if (justcheck || rval == ENOSPC) {
 		ASSERT(tmpbuffer2);
-		bcopy(tmpbuffer2, bp->data, lbsize);
+		memcpy(bp->data, tmpbuffer2, lbsize);
 	} else {
 		xfs_da_log_buf(trans, bp, 0, lbsize - 1);
 	}
@@ -1357,7 +1357,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * path point to the block we want to drop (this one).
 		 */
 		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
-		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
 		if (error)
@@ -1418,7 +1418,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * Make altpath point to the block we want to keep (the lower
 	 * numbered block) and path point to the block we want to drop.
 	 */
-	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	memcpy(&state->altpath, &state->path, sizeof(state->path));
 	if (blkno < blk->blkno) {
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1538,17 +1538,17 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
 	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-	bzero((char *)namest, entsize);
+	memset((char *)namest, 0, entsize);
 	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
 
 	INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
 	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
-	ovbcopy(entry + 1, entry, tmp);
+	memmove(entry, entry + 1, tmp);
 	INT_MOD(hdr->count, ARCH_CONVERT, -1);
 	xfs_da_log_buf(trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
 	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
-	bzero((char *)entry, sizeof(xfs_dir_leaf_entry_t));
+	memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
 
 	/*
 	 * If we removed the first entry, re-find the first used byte
@@ -1642,7 +1642,7 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
 		ASSERT(tmpbuffer != NULL);
-		bzero(tmpbuffer, state->blocksize);
+		memset(tmpbuffer, 0, state->blocksize);
 		tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info; /* struct copy */
@@ -1664,7 +1664,7 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
 					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
 		}
-		bcopy(tmp_leaf, save_leaf, state->blocksize);
+		memcpy(save_leaf, tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
 	}
 
@@ -1750,7 +1750,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
 		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
 		if (entry->namelen == args->namelen &&
 		    namest->name[0] == args->name[0] &&
-		    bcmp(args->name, namest->name, args->namelen) == 0) {
+		    memcmp(args->name, namest->name, args->namelen) == 0) {
 			XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args->inumber, ARCH_CONVERT);
 			*index = probe;
 			return(XFS_ERROR(EEXIST));
@@ -1813,7 +1813,7 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
-		bcopy(entry_s, entry_d, tmp);
+		memcpy(entry_d, entry_s, tmp);
 	}
 
 	/*
@@ -1831,11 +1831,11 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT);
 		entry_d->namelen = entry_s->namelen;
 		ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		bcopy(XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
-		      XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), tmp);
+		memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)),
+		       XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp);
 		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		bzero((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
-		      tmp);
+		memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+		      0, tmp);
 		INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
 		INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
 		INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
@@ -1853,7 +1853,7 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	} else {
 		/*
 		 * Move the remaining entries down to fill the hole,
@@ -1863,12 +1863,12 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
-		bcopy(entry_s, entry_d, tmp);
+		memcpy(entry_d, entry_s, tmp);
 
 		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
 		entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
 		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		bzero((char *)entry_s, tmp);
+		memset((char *)entry_s, 0, tmp);
 	}
 
 	/*
@@ -2191,7 +2191,7 @@ xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
 	idbp->d_off = pa->cook.o;
 	idbp->d_name[namelen] = '\0';
 	pa->done = 1;
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	return 0;
 }
 
@@ -2217,7 +2217,7 @@ xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
 	idbp->d_ino = pa->ino;
 	idbp->d_off = pa->cook.o;
 	idbp->d_name[namelen] = '\0';
-	bcopy(pa->name, idbp->d_name, namelen);
+	memcpy(idbp->d_name, pa->name, namelen);
 	retval = uiomove((caddr_t)idbp, reclen, UIO_READ, uio);
 	pa->done = (retval == 0);
 	return retval;
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
index ede171472223..10c60645f1fc 100644
--- a/fs/xfs/xfs_dir_sf.h
+++ b/fs/xfs/xfs_dir_sf.h
@@ -46,7 +46,7 @@ typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
  * be calculated on the fly.
  *
  * Entries are packed toward the top as tight as possible.  The header
- * and the elements much be bcopy()'d out into a work area to get correct
+ * and the elements much be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
 typedef struct xfs_dir_shortform {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f67497d78935..fd1ae9b156b1 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -94,7 +94,7 @@ xfs_qm_dqinit(
 #endif
 	} else {
 		/*
-		 * Only the q_core portion was bzeroed in dqreclaim_one().
+		 * Only the q_core portion was zeroed in dqreclaim_one().
 		 * So, we need to reset others.
 		 */
 		 dqp->q_nrefs = 0;
@@ -156,7 +156,7 @@ xfs_qm_dqinit_core(
 	xfs_dqblk_t	 *d)
 {
 	/*
-	 * Caller has bzero'd the entire dquot 'chunk' already.
+	 * Caller has zero'd the entire dquot 'chunk' already.
 	 */
 	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
 	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
@@ -351,7 +351,7 @@ xfs_qm_init_dquot_blk(
 	 */
 	curid = id - (id % XFS_QM_DQPERBLK(mp));
 	ASSERT(curid >= 0);
-	bzero(d, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
+	memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
 	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
 		xfs_qm_dqinit_core(curid, type, d);
 	xfs_trans_dquot_buf(tp, bp,
@@ -614,7 +614,7 @@ xfs_qm_dqread(
 	}
 
 	/* copy everything from disk dquot to the incore dquot */
-	bcopy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
 	ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
 	xfs_qm_dquot_logitem_init(dqp);
 
@@ -1209,7 +1209,7 @@ xfs_qm_dqflush(
 	}
 
 	/* This is the only portion of data that needs to persist */
-	bcopy(&(dqp->q_core), ddqp, sizeof(xfs_disk_dquot_t));
+	memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
 
 	/*
 	 * Clear the dirty field and remember the flush lsn for later use.
@@ -1475,7 +1475,7 @@ xfs_qm_dqpurge(
 	dqp->q_mount = NULL;;
 	dqp->q_hash = NULL;
 	dqp->dq_flags = XFS_DQ_INACTIVE;
-	bzero(&dqp->q_core, sizeof(dqp->q_core));
+	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
 	XFS_DQ_HASH_UNLOCK(thishash);
@@ -1585,7 +1585,7 @@ xfs_qm_dqcheck(
 	 */
 	ASSERT(id != -1);
 	ASSERT(flags & XFS_QMOPT_DQREPAIR);
-	bzero(ddq, sizeof(xfs_dqblk_t));
+	memset(ddq, 0, sizeof(xfs_dqblk_t));
 	xfs_qm_dqinit_core(id, type, (xfs_dqblk_t *)ddq);
 	return (errs);
 }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index b5ceb0316aba..823aa2913e75 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -69,9 +69,9 @@ char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
 void
 xfs_error_test_init(void)
 {
-	bzero(xfs_etest, sizeof(xfs_etest));
-	bzero(xfs_etest_fsid, sizeof(xfs_etest_fsid));
-	bzero(xfs_etest_fsname, sizeof(xfs_etest_fsname));
+	memset(xfs_etest, 0, sizeof(xfs_etest));
+	memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
+	memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
 }
 
 int
@@ -84,12 +84,12 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
 	if (random() % randfactor)
 		return 0;
 
-	bcopy(fsidp, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, fsidp, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
 		if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
 			cmn_err(CE_WARN,
-	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"\n",
+	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
 				expression, file, line, xfs_etest_fsname[i]);
 			return 1;
 		}
@@ -105,7 +105,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
 	int len;
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
 		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
@@ -138,7 +138,7 @@ xfs_errortag_clear(int error_tag, xfs_mount_t *mp)
 	int i;
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
 		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
@@ -191,7 +191,7 @@ xfs_errortag_clearall(xfs_mount_t *mp)
 {
 	int64_t fsid;
 
-	bcopy(mp->m_fixedfsid, &fsid, sizeof(fsid_t));
+	memcpy(&fsid, mp->m_fixedfsid, sizeof(fsid_t));
 
 	return xfs_errortag_clearall_umount(fsid, mp->m_fsname, 1);
 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7984d92618fd..0311d1258076 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -427,9 +427,9 @@ typedef struct xfs_handle {
 				 - (char *) &(handle))			  \
 				 + (handle).ha_fid.xfs_fid_len)
 
-#define XFS_HANDLE_CMP(h1, h2)	bcmp(h1, h2, sizeof (xfs_handle_t))
+#define XFS_HANDLE_CMP(h1, h2)	memcmp(h1, h2, sizeof(xfs_handle_t))
 
-#define FSHSIZE		sizeof (fsid_t)
+#define FSHSIZE		sizeof(fsid_t)
 
 
 /*
@@ -498,13 +498,5 @@ typedef struct xfs_handle {
 #define BTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
 #define BTOBBT(bytes)	((__u64)(bytes) >> BBSHIFT)
 #define BBTOB(bbs)	((bbs) << BBSHIFT)
-#define OFFTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
-#define OFFTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT)
-#define BBTOOFF(bbs)	((__u64)(bbs) << BBSHIFT)
-
-#define SEEKLIMIT32	0x7fffffff
-#define BBSEEKLIMIT32	BTOBBT(SEEKLIMIT32)
-#define SEEKLIMIT	0x7fffffffffffffffLL
-#define BBSEEKLIMIT	OFFTOBBT(SEEKLIMIT)
 
 #endif	/* _LINUX_XFS_FS_H */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 4cd53ed2d791..aac21b2532a6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -149,7 +149,7 @@ xfs_growfs_data_private(
 			sizeof(xfs_perag_t) * nagcount,
 			sizeof(xfs_perag_t) * oagcount,
 			KM_SLEEP);
-		bzero(&mp->m_perag[oagcount],
+		memset(&mp->m_perag[oagcount], 0,
 			(nagcount - oagcount) * sizeof(xfs_perag_t));
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 		xfs_initialize_perag(mp, nagcount);
@@ -175,7 +175,7 @@ xfs_growfs_data_private(
 				  disk_addr,
 				  sectbb, 0);
 		agf = XFS_BUF_TO_AGF(bp);
-		bzero(agf, mp->m_sb.sb_sectsize);
+		memset(agf, 0, mp->m_sb.sb_sectsize);
 		INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
 		INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
 		INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
@@ -208,7 +208,7 @@ xfs_growfs_data_private(
 				  disk_addr,
 				  sectbb, 0);
 		agi = XFS_BUF_TO_AGI(bp);
-		bzero(agi, mp->m_sb.sb_sectsize);
+		memset(agi, 0, mp->m_sb.sb_sectsize);
 		INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
 		INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
 		INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
@@ -233,7 +233,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
@@ -255,7 +255,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
@@ -278,7 +278,7 @@ xfs_growfs_data_private(
 			disk_addr,
 			BTOBB(bsize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
-		bzero(block, bsize);
+		memset(block, 0, bsize);
 		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
 		INT_ZERO(block->bb_level, ARCH_CONVERT);
 		INT_ZERO(block->bb_numrecs, ARCH_CONVERT);
@@ -353,7 +353,7 @@ xfs_growfs_data_private(
 				  sectbb, 0, &bp);
 		if (error) {
 			xfs_fs_cmn_err(CE_WARN, mp,
-			"error %d reading secondary superblock for ag %d\n",
+			"error %d reading secondary superblock for ag %d",
 				error, agno);
 			break;
 		}
@@ -368,7 +368,7 @@ xfs_growfs_data_private(
 			continue;
 		} else {
 			xfs_fs_cmn_err(CE_WARN, mp,
-		"write error %d updating secondary superblock for ag %d\n",
+		"write error %d updating secondary superblock for ag %d",
 				error, agno);
 			break; /* no point in continuing */
 		}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 185e62f08c70..b8f68d7c3605 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -263,7 +263,7 @@ xfs_ialloc_ag_alloc(
 		INT_ZERO(dic.di_gid, ARCH_CONVERT);
 		INT_ZERO(dic.di_nlink, ARCH_CONVERT);
 		INT_ZERO(dic.di_projid, ARCH_CONVERT);
-		bzero(&(dic.di_pad[0]),sizeof(dic.di_pad));
+		memset(&(dic.di_pad[0]), 0, sizeof(dic.di_pad));
 		INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec);
 		INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
 
@@ -287,7 +287,7 @@ xfs_ialloc_ag_alloc(
 
 		for (i = 0; i < ninodes; i++) {
 			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
-			bcopy (&dic, &(free->di_core), sizeof(xfs_dinode_core_t));
+			memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
 			INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
 			xfs_ialloc_log_di(tp, fbuf, i,
 				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 0feaf85a5455..bded6cfd75f3 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -139,9 +139,9 @@ xfs_inobt_delrec(
 		}
 #endif
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&kp[ptr], &kp[ptr - 1],
+			memmove(&kp[ptr - 1], &kp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp));
-			ovbcopy(&pp[ptr], &pp[ptr - 1],
+			memmove(&pp[ptr - 1], &pp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp));
 			xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 			xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
@@ -154,7 +154,7 @@ xfs_inobt_delrec(
 	else {
 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
 		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			ovbcopy(&rp[ptr], &rp[ptr - 1],
+			memmove(&rp[ptr - 1], &rp[ptr],
 				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp));
 			xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
 		}
@@ -450,8 +450,8 @@ xfs_inobt_delrec(
 				return error;
 		}
 #endif
-		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
-		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
+		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
+		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
 		xfs_inobt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
@@ -462,7 +462,7 @@ xfs_inobt_delrec(
 		 */
 		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
 		xfs_inobt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
 				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	}
@@ -690,9 +690,9 @@ xfs_inobt_insrec(
 				return error;
 		}
 #endif
-		ovbcopy(&kp[ptr - 1], &kp[ptr],
+		memmove(&kp[ptr], &kp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
-		ovbcopy(&pp[ptr - 1], &pp[ptr],
+		memmove(&pp[ptr], &pp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
 		/*
 		 * Now stuff the new data in, bump numrecs and log the new data.
@@ -711,7 +711,7 @@ xfs_inobt_insrec(
 		 * It's a leaf entry.  Make a hole for the new record.
 		 */
 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
-		ovbcopy(&rp[ptr - 1], &rp[ptr],
+		memmove(&rp[ptr], &rp[ptr - 1],
 			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
 		/*
 		 * Now stuff the new record in, bump numrecs
@@ -1170,12 +1170,12 @@ xfs_inobt_lshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 	} else {
-		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
 		rkp = &key;
@@ -1421,8 +1421,8 @@ xfs_inobt_rshift(
 				return error;
 		}
 #endif
-		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
 			return error;
@@ -1434,7 +1434,7 @@ xfs_inobt_rshift(
 	} else {
 		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		*rrp = *lrp;
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
@@ -1562,8 +1562,8 @@ xfs_inobt_split(
 				return error;
 		}
 #endif
-		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
 		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		*keyp = *rkp;
@@ -1574,7 +1574,7 @@ xfs_inobt_split(
 	else {
 		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
 		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
 		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
 	}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 7a8b02b39557..52c1b10c2afd 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -31,6 +31,7 @@
  */
 
 #include <xfs.h>
+#include <linux/pagemap.h>
 
 
 /*
@@ -109,25 +110,36 @@ xfs_chash_free(xfs_mount_t *mp)
 	mp->m_chash = NULL;
 }
 
-
-static inline void
-xfs_iget_vnode_init(
+void
+xfs_revalidate_inode(
 	xfs_mount_t	*mp,
 	vnode_t		*vp,
 	xfs_inode_t	*ip)
 {
-	vp->v_vfsp  = XFS_MTOVFS(mp);
-	vp->v_type  = IFTOVT(ip->i_d.di_mode);
+	struct inode	*inode = LINVFS_GET_IP(vp);
 
-	/* If we have a real type for an on-disk inode, we can set ops(&unlock)
-	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
-	 */
-	if (vp->v_type != VNON) {
-		linvfs_set_inode_ops(LINVFS_GET_IP(vp));
+	inode->i_mode	= (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+	inode->i_nlink	= ip->i_d.di_nlink;
+	inode->i_uid	= ip->i_d.di_uid;
+	inode->i_gid 	= ip->i_d.di_gid;
+	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+		inode->i_rdev	= NODEV;
+	} else {
+		xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+		inode->i_rdev	= XFS_DEV_TO_KDEVT(dev);
 	}
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_generation = ip->i_d.di_gen;
+	inode->i_size	= ip->i_d.di_size;
+	inode->i_blocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+	inode->i_atime	= ip->i_d.di_atime.t_sec;
+	inode->i_mtime	= ip->i_d.di_mtime.t_sec;
+	inode->i_ctime	= ip->i_d.di_ctime.t_sec;
+
+	vp->v_flag &= ~VMODIFIED;
 }
 
-
 /*
  * Look up an inode by number in the given file system.
  * The inode is looked up in the hash table for the file system
@@ -159,7 +171,7 @@ xfs_iget_vnode_init(
  * bno -- the block number starting the buffer containing the inode,
  *	  if known (as by bulkstat), else 0.
  */
-int
+STATIC int
 xfs_iget_core(
 	vnode_t		*vp,
 	xfs_mount_t	*mp,
@@ -205,16 +217,9 @@ again:
 					goto again;
 				}
 
-				xfs_iget_vnode_init(mp, vp, ip);
-
 				vn_trace_exit(vp, "xfs_iget.alloc",
 					(inst_t *)__return_address);
 
-				bhv_desc_init(&(ip->i_bhv_desc), ip, vp,
-							&xfs_vnodeops);
-				vn_bhv_insert_initial(VN_BHV_HEAD(vp),
-							&(ip->i_bhv_desc));
-
 				XFS_STATS_INC(xfsstats.xs_ig_found);
 
 				read_unlock(&ih->ih_lock);
@@ -240,18 +245,12 @@ again:
 				cmn_err(CE_PANIC,
 			"xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
 						inode_vp, vp);
-				BUG();
 			}
 
 			read_unlock(&ih->ih_lock);
 
 			XFS_STATS_INC(xfsstats.xs_ig_found);
 
-			/*
-			 * Make sure the vnode and the inode are hooked up
-			 */
-			xfs_iget_vnode_init(mp, vp, ip);
-
 finish_inode:
 			if (lock_flags != 0) {
 				xfs_ilock(ip, lock_flags);
@@ -288,19 +287,8 @@ finish_inode:
 		return error;
 	}
 
-	/*
-	 * Vnode provided by vn_initialize.
-	 */
-
-	xfs_iget_vnode_init(mp, vp, ip);
-
 	vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
 
-	if (vp->v_fbhv == NULL) {
-		bhv_desc_init(&(ip->i_bhv_desc), ip, vp, &xfs_vnodeops);
-		vn_bhv_insert_initial(VN_BHV_HEAD(vp), &(ip->i_bhv_desc));
-	}
-
 	xfs_inode_lock_init(ip, vp);
 	xfs_iocore_inode_init(ip);
 
@@ -429,8 +417,11 @@ finish_inode:
 
 	*ipp = ip;
 
-	/* Update the linux inode */
-	error = vn_revalidate(vp, ATTR_COMM|ATTR_LAZY);
+	/*
+	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
+	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
+	 */
+	VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
 
 	return 0;
 }
@@ -495,7 +486,6 @@ inode_allocate:
 			newnode = (ip->i_d.di_mode == 0);
 			if (newnode)
 				xfs_iocore_inode_reinit(ip);
-			vn_revalidate(vp, ATTR_COMM|ATTR_LAZY);
 			XFS_STATS_INC(xfsstats.xs_ig_found);
 			*ipp = ip;
 			error = 0;
@@ -506,7 +496,6 @@ inode_allocate:
 	return error;
 }
 
-
 /*
  * Do the setup for the various locks within the incore inode.
  */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f7c35e754899..bd343063b60d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -56,8 +56,9 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
  */
 STATIC void
 xfs_validate_extents(
-	xfs_bmbt_rec_32_t	*ep,
+	xfs_bmbt_rec_t		*ep,
 	int			nrecs,
+	int			disk,
 	xfs_exntfmt_t		fmt)
 {
 	xfs_bmbt_irec_t		irec;
@@ -65,15 +66,18 @@ xfs_validate_extents(
 	xfs_bmbt_rec_t		rec;
 
 	for (i = 0; i < nrecs; i++) {
-		bcopy(ep, &rec, sizeof(rec));
-		xfs_bmbt_get_all(&rec, &irec);
+		memcpy(&rec, ep, sizeof(rec));
+		if (disk)
+			xfs_bmbt_disk_get_all(&rec, &irec);
+		else
+			xfs_bmbt_get_all(&rec, &irec);
 		if (fmt == XFS_EXTFMT_NOSTATE)
 			ASSERT(irec.br_state == XFS_EXT_NORM);
 		ep++;
 	}
 }
 #else /* DEBUG */
-#define xfs_validate_extents(ep, nrecs, fmt)
+#define xfs_validate_extents(ep, nrecs, disk, fmt)
 #endif /* DEBUG */
 
 /*
@@ -551,7 +555,7 @@ xfs_iformat_local(
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or bcopy() below.
+	 * kmem_alloc() or memcpy() below.
 	 */
 	if (size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
@@ -575,7 +579,8 @@ xfs_iformat_local(
 	ifp->if_bytes = size;
 	ifp->if_real_bytes = real_size;
 	if (size)
-		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_data, size);
+		memcpy(ifp->if_u1.if_data,
+			XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), size);
 	ifp->if_flags &= ~XFS_IFEXTENTS;
 	ifp->if_flags |= XFS_IFINLINE;
 	return 0;
@@ -597,9 +602,10 @@ xfs_iformat_extents(
 	int		whichfork)
 {
 	xfs_ifork_t	*ifp;
-	int		nex;
+	int		nex, i;
 	int		real_size;
 	int		size;
+	xfs_bmbt_rec_t	*ep, *dp;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
@@ -608,7 +614,7 @@ xfs_iformat_extents(
 	/*
 	 * If the number of extents is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or bcopy() below.
+	 * kmem_alloc() or memcpy() below.
 	 */
 	if (size < 0 || size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
@@ -632,10 +638,18 @@ xfs_iformat_extents(
 	ifp->if_real_bytes = real_size;
 	if (size) {
 		xfs_validate_extents(
-			(xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
-			nex, XFS_EXTFMT_INODE(ip));
-		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_extents,
-		      size);
+			(xfs_bmbt_rec_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
+			nex, 1, XFS_EXTFMT_INODE(ip));
+		dp = (xfs_bmbt_rec_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+		ep = ifp->if_u1.if_extents;
+#if ARCH_CONVERT != ARCH_NOCONVERT
+		for (i = 0; i < nex; i++, ep++, dp++) {
+			ep->l0 = INT_GET(dp->l0, ARCH_CONVERT);
+			ep->l1 = INT_GET(dp->l1, ARCH_CONVERT);
+		}
+#else
+		memcpy(ep, dp, size);
+#endif
 		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
 			whichfork);
 		if (whichfork != XFS_DATA_FORK ||
@@ -731,9 +745,9 @@ xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip,
 
     if (arch == ARCH_NOCONVERT) {
 	if (dir>0) {
-	    bcopy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
+	    memcpy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
 	} else {
-	    bcopy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
+	    memcpy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
 	}
 	return;
     }
@@ -749,9 +763,9 @@ xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip,
     INT_XLATE(buf_core->di_projid,	mem_core->di_projid,	   dir, arch);
 
     if (dir>0) {
-	bcopy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
+	memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
     } else {
-	bcopy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
+	memcpy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
     }
 
     INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,  dir, arch);
@@ -978,8 +992,8 @@ xfs_iread_extents(
 		ifp->if_flags &= ~XFS_IFEXTENTS;
 		return error;
 	}
-	xfs_validate_extents((xfs_bmbt_rec_32_t *)ifp->if_u1.if_extents,
-		XFS_IFORK_NEXTENTS(ip, whichfork), XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents,
+		XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip));
 	return 0;
 }
 
@@ -1015,7 +1029,7 @@ xfs_ialloc(
 	xfs_inode_t	*pip,
 	mode_t		mode,
 	nlink_t		nlink,
-	dev_t		rdev,
+	xfs_dev_t	rdev,
 	cred_t		*cr,
 	xfs_prid_t	prid,
 	int		okalloc,
@@ -1065,10 +1079,7 @@ xfs_ialloc(
 	ip->i_d.di_uid = current->fsuid;
 	ip->i_d.di_gid = current->fsgid;
 	ip->i_d.di_projid = prid;
-	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
-
-	/* now that we have a v_type we can set Linux inode ops (& unlock) */
-	linvfs_set_inode_ops(LINVFS_GET_IP(XFS_ITOV(ip)));
+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 
 	/*
 	 * If the superblock version is up to where we support new format
@@ -1100,14 +1111,13 @@ xfs_ialloc(
 
 	/*
 	 * If the group ID of the new file does not match the effective group
-	 * ID or one of the supplementary group IDs, the ISGID bit is
-	 * cleared if the "irixsgid" mount option is set.
+	 * ID or one of the supplementary group IDs, the ISGID bit is cleared
+	 * (and only if the irix_sgid_inherit compatibility variable is set).
 	 */
-	if (ip->i_d.di_mode & ISGID) {
-		if (!in_group_p((gid_t)ip->i_d.di_gid)
-		    && (ip->i_mount->m_flags & XFS_MOUNT_IRIXSGID)) {
-			ip->i_d.di_mode &= ~ISGID;
-		}
+	if ((irix_sgid_inherit) &&
+	    (ip->i_d.di_mode & ISGID) &&
+	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
+		ip->i_d.di_mode &= ~ISGID;
 	}
 
 	ip->i_d.di_size = 0;
@@ -1128,7 +1138,7 @@ xfs_ialloc(
 	case IFBLK:
 	case IFSOCK:
 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
-		ip->i_df.if_u2.if_rdev = IRIX_MKDEV(MAJOR(rdev), MINOR(rdev));
+		ip->i_df.if_u2.if_rdev = rdev;
 		ip->i_df.if_flags = 0;
 		flags |= XFS_ILOG_DEV;
 		break;
@@ -1172,6 +1182,10 @@ xfs_ialloc(
 	 * Log the new values stuffed into the inode.
 	 */
 	xfs_trans_log_inode(tp, ip, flags);
+
+	/* now that we have a v_type we can set Linux inode ops (& unlock) */
+	VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
+
 	*ipp = ip;
 	return 0;
 }
@@ -1714,7 +1728,7 @@ xfs_igrow_start(
 	 * and any blocks between the old and new file sizes.
 	 */
 	error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
-				new_size, NULL);
+				new_size);
 	return error;
 }
 
@@ -2156,7 +2170,7 @@ xfs_iroot_realloc(
 		ifp->if_broot_bytes = (int)new_size;
 		ASSERT(ifp->if_broot_bytes <=
 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
-		ovbcopy(op, np, cur_max * (uint)sizeof(xfs_dfsbno_t));
+		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
 		return;
 	}
 
@@ -2178,7 +2192,7 @@ xfs_iroot_realloc(
 		/*
 		 * First copy over the btree block header.
 		 */
-		bcopy(ifp->if_broot, new_broot, sizeof(xfs_bmbt_block_t));
+		memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t));
 	} else {
 		new_broot = NULL;
 		ifp->if_flags &= ~XFS_IFBROOT;
@@ -2195,7 +2209,7 @@ xfs_iroot_realloc(
 						     ifp->if_broot_bytes);
 		np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
 						     (int)new_size);
-		bcopy(op, np, new_max * (uint)sizeof(xfs_bmbt_rec_t));
+		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
 
 		/*
 		 * Then copy the pointers.
@@ -2204,7 +2218,7 @@ xfs_iroot_realloc(
 						     ifp->if_broot_bytes);
 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
 						     (int)new_size);
-		bcopy(op, np, new_max * (uint)sizeof(xfs_dfsbno_t));
+		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
 	}
 	kmem_free(ifp->if_broot, ifp->if_broot_bytes);
 	ifp->if_broot = new_broot;
@@ -2268,8 +2282,8 @@ xfs_iext_realloc(
 			 * so the if_extents pointer is null.
 			 */
 			if (ifp->if_u1.if_extents) {
-				bcopy(ifp->if_u1.if_extents,
-				      ifp->if_u2.if_inline_ext, new_size);
+				memcpy(ifp->if_u2.if_inline_ext,
+					ifp->if_u1.if_extents, new_size);
 				kmem_free(ifp->if_u1.if_extents,
 					  ifp->if_real_bytes);
 			}
@@ -2286,7 +2300,7 @@ xfs_iext_realloc(
 		if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
 			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
 				kmem_alloc(rnew_size, KM_SLEEP);
-			bcopy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+			memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
 			      sizeof(ifp->if_u2.if_inline_ext));
 		} else if (rnew_size != ifp->if_real_bytes) {
 			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
@@ -2349,7 +2363,7 @@ xfs_idata_realloc(
 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
 			ASSERT(ifp->if_real_bytes != 0);
-			bcopy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
 			      new_size);
 			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
@@ -2382,8 +2396,8 @@ xfs_idata_realloc(
 		} else {
 			ASSERT(ifp->if_real_bytes == 0);
 			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
-			bcopy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
-			      ifp->if_bytes);
+			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+				ifp->if_bytes);
 		}
 	}
 	ifp->if_real_bytes = real_size;
@@ -2610,17 +2624,17 @@ xfs_iunpin_wait(
  * returns the number of bytes copied into the buffer.
  *
  * If there are no delayed allocation extents, then we can just
- * bcopy() the extents into the buffer.	 Otherwise, we need to
+ * memcpy() the extents into the buffer.  Otherwise, we need to
  * examine each extent in turn and skip those which are delayed.
  */
 int
 xfs_iextents_copy(
 	xfs_inode_t		*ip,
-	xfs_bmbt_rec_32_t	*buffer,
+	xfs_bmbt_rec_t		*buffer,
 	int			whichfork)
 {
 	int			copied;
-	xfs_bmbt_rec_32_t	*dest_ep;
+	xfs_bmbt_rec_t		*dest_ep;
 	xfs_bmbt_rec_t		*ep;
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_iextents_copy";
@@ -2637,28 +2651,13 @@ xfs_iextents_copy(
 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
 	ASSERT(nrecs > 0);
-	if (nrecs == XFS_IFORK_NEXTENTS(ip, whichfork)) {
-		/*
-		 * There are no delayed allocation extents,
-		 * so just copy everything.
-		 */
-		ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-		ASSERT(ifp->if_bytes ==
-		       (XFS_IFORK_NEXTENTS(ip, whichfork) *
-			(uint)sizeof(xfs_bmbt_rec_t)));
-		bcopy(ifp->if_u1.if_extents, buffer, ifp->if_bytes);
-		xfs_validate_extents(buffer, nrecs, XFS_EXTFMT_INODE(ip));
-		return ifp->if_bytes;
-	}
 
-	ASSERT(whichfork == XFS_DATA_FORK);
 	/*
 	 * There are some delayed allocation extents in the
 	 * inode, so copy the extents one at a time and skip
 	 * the delayed ones.  There must be at least one
 	 * non-delayed extent.
 	 */
-	ASSERT(nrecs > ip->i_d.di_nextents);
 	ep = ifp->if_u1.if_extents;
 	dest_ep = buffer;
 	copied = 0;
@@ -2672,15 +2671,19 @@ xfs_iextents_copy(
 			continue;
 		}
 
-		*dest_ep = *(xfs_bmbt_rec_32_t *)ep;
+#if ARCH_CONVERT != ARCH_NOCONVERT
+		/* Translate to on disk format */
+		dest_ep->l0 = INT_GET(ep->l0, ARCH_CONVERT);
+		dest_ep->l1 = INT_GET(ep->l1, ARCH_CONVERT);
+#else
+		*dest_ep = *ep;
+#endif
 		dest_ep++;
 		ep++;
 		copied++;
 	}
 	ASSERT(copied != 0);
-	ASSERT(copied == ip->i_d.di_nextents);
-	ASSERT((copied * (uint)sizeof(xfs_bmbt_rec_t)) <= XFS_IFORK_DSIZE(ip));
-	xfs_validate_extents(buffer, copied, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip));
 
 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
 }
@@ -2736,7 +2739,7 @@ xfs_iflush_fork(
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-			bcopy(ifp->if_u1.if_data, cp, ifp->if_bytes);
+			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
 		}
 		if (whichfork == XFS_DATA_FORK) {
 			if (XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip)) {
@@ -2753,7 +2756,7 @@ xfs_iflush_fork(
 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
-			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_32_t *)cp,
+			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
 				whichfork);
 		}
 		break;
@@ -2781,7 +2784,7 @@ xfs_iflush_fork(
 	case XFS_DINODE_FMT_UUID:
 		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
 			ASSERT(whichfork == XFS_DATA_FORK);
-			bcopy(&ip->i_df.if_u2.if_uuid, &dip->di_u.di_muuid,
+			memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid,
 				sizeof(uuid_t));
 		}
 		break;
@@ -3208,8 +3211,8 @@ xfs_iflush_int(
 			INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2);
 			ip->i_d.di_onlink = 0;
 			INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT);
-			bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
-			bzero(&(dip->di_core.di_pad[0]),
+			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+			memset(&(dip->di_core.di_pad[0]), 0,
 			      sizeof(dip->di_core.di_pad));
 			ASSERT(ip->i_d.di_projid == 0);
 		}
@@ -3377,7 +3380,7 @@ xfs_iflush_all(
 			 * entry in the list anyway so we'll know below
 			 * whether we reached the end or not.
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			vp->v_flag |= VPURGE;		/* OK for vn_purge */
 			XFS_MOUNT_IUNLOCK(mp);
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 308bc90d825f..38669ac946f0 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -98,7 +98,6 @@ struct xfs_inode_log_item;
 struct xfs_mount;
 struct xfs_trans;
 struct xfs_dquot;
-struct pm;
 
 
 /*
@@ -119,23 +118,6 @@ typedef struct xfs_gap {
 	xfs_extlen_t	xg_count_fsb;
 } xfs_gap_t;
 
-/*
- * This structure is used to hold common pieces of the buffer
- * and file for xfs_dio_write and xfs_dio_read.
- */
-typedef struct xfs_dio {
-	struct xfs_buf	*xd_bp;
-	bhv_desc_t	*xd_bdp;
-	struct xfs_inode *xd_ip;
-	struct xfs_iocore *xd_io;
-	struct cred	*xd_cr;
-	struct pm	*xd_pmp;
-	int		xd_blkalgn;
-	int		xd_ioflag;
-	xfs_off_t	xd_start;
-	size_t		xd_length;
-} xfs_dio_t;
-
 typedef struct dm_attrs_s {
 	__uint32_t	da_dmevmask;	/* DMIG event mask */
 	__uint16_t	da_dmstate;	/* DMIG state info */
@@ -516,7 +498,7 @@ int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
 int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
-			   dev_t, struct cred *, xfs_prid_t, int,
+			   xfs_dev_t, struct cred *, xfs_prid_t, int,
 			   struct xfs_buf **, boolean_t *, xfs_inode_t **);
 void		xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
 			   xfs_arch_t);
@@ -538,7 +520,7 @@ void		xfs_iext_realloc(xfs_inode_t *, int, int);
 void		xfs_iroot_realloc(xfs_inode_t *, int, int);
 void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
-int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
 int		xfs_iflush_all(struct xfs_mount *, int);
 int		xfs_ibusy_check(xfs_inode_t *, int);
@@ -550,7 +532,7 @@ void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
-
+void xfs_revalidate_inode(struct xfs_mount *, vnode_t *vp, xfs_inode_t *);
 
 #ifdef DEBUG
 void		xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9beacce25dec..d23d596d7973 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -96,7 +96,7 @@ xfs_inode_item_size(
 			if (iip->ili_root_size > 0) {
 				ASSERT(iip->ili_root_size ==
 				       ip->i_df.if_broot_bytes);
-				ASSERT(bcmp(iip->ili_orig_root,
+				ASSERT(memcmp(iip->ili_orig_root,
 					    ip->i_df.if_broot,
 					    iip->ili_root_size) == 0);
 			} else {
@@ -214,7 +214,7 @@ xfs_inode_item_format(
 	xfs_log_iovec_t		*vecp;
 	xfs_inode_t		*ip;
 	size_t			data_bytes;
-	xfs_bmbt_rec_32_t	*ext_buffer;
+	xfs_bmbt_rec_t		*ext_buffer;
 	int			nrecs;
 	xfs_mount_t		*mp;
 
@@ -297,7 +297,7 @@ xfs_inode_item_format(
 			 */
 			ip->i_d.di_version = XFS_DINODE_VERSION_2;
 			ip->i_d.di_onlink = 0;
-			bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 		}
 	}
 
@@ -314,6 +314,7 @@ xfs_inode_item_format(
 			nrecs = ip->i_df.if_bytes /
 				(uint)sizeof(xfs_bmbt_rec_t);
 			ASSERT(nrecs > 0);
+#if ARCH_CONVERT == ARCH_NOCONVERT
 			if (nrecs == ip->i_d.di_nextents) {
 				/*
 				 * There are no delayed allocation
@@ -323,10 +324,14 @@ xfs_inode_item_format(
 				vecp->i_addr =
 					(char *)(ip->i_df.if_u1.if_extents);
 				vecp->i_len = ip->i_df.if_bytes;
-			} else {
+			} else 
+#endif
+			{
 				/*
 				 * There are delayed allocation extents
-				 * in the inode.  Use xfs_iextents_copy()
+				 * in the inode, or we need to convert
+				 * the extents to on disk format.
+				 * Use xfs_iextents_copy()
 				 * to copy only the real extents into
 				 * a separate buffer.  We'll free the
 				 * buffer in the unlock routine.
@@ -336,7 +341,7 @@ xfs_inode_item_format(
 				iip->ili_extents_buf = ext_buffer;
 				vecp->i_addr = (xfs_caddr_t)ext_buffer;
 				vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
-					XFS_DATA_FORK);
+						XFS_DATA_FORK);
 			}
 			ASSERT(vecp->i_len <= ip->i_df.if_bytes);
 			iip->ili_format.ilf_dsize = vecp->i_len;
@@ -428,6 +433,7 @@ xfs_inode_item_format(
 		ASSERT(!(iip->ili_format.ilf_fields &
 			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
 		if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
+			ASSERT(!(iip->ili_format.ilf_fields & XFS_ILOG_DEXT));
 			ASSERT(ip->i_afp->if_bytes > 0);
 			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
 			ASSERT(ip->i_d.di_anextents > 0);
@@ -437,12 +443,25 @@ xfs_inode_item_format(
 #endif
 			ASSERT(nrecs > 0);
 			ASSERT(nrecs == ip->i_d.di_anextents);
+#if ARCH_CONVERT == ARCH_NOCONVERT
 			/*
 			 * There are not delayed allocation extents
 			 * for attributes, so just point at the array.
 			 */
 			vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
 			vecp->i_len = ip->i_afp->if_bytes;
+#else		
+			ASSERT(iip->ili_aextents_buf == NULL);
+			/*
+			 * Need to endian flip before logging
+			 */
+			ext_buffer = kmem_alloc(ip->i_df.if_bytes,
+				KM_SLEEP);
+			iip->ili_aextents_buf = ext_buffer;
+			vecp->i_addr = (xfs_caddr_t)ext_buffer;
+			vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
+					XFS_ATTR_FORK);
+#endif
 			iip->ili_format.ilf_asize = vecp->i_len;
 			vecp++;
 			nvecs++;
@@ -630,7 +649,6 @@ xfs_inode_item_unlock(
 	 * If the inode needed a separate buffer with which to log
 	 * its extents, then free it now.
 	 */
-	/* FIXME */
 	if (iip->ili_extents_buf != NULL) {
 		ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(ip->i_d.di_nextents > 0);
@@ -639,6 +657,14 @@ xfs_inode_item_unlock(
 		kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
 		iip->ili_extents_buf = NULL;
 	}
+	if (iip->ili_aextents_buf != NULL) {
+		ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
+		ASSERT(ip->i_d.di_anextents > 0);
+		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
+		ASSERT(ip->i_afp->if_bytes > 0);
+		kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+		iip->ili_aextents_buf = NULL;
+	}
 
 	/*
 	 * Figure out if we should unlock the inode or not.
@@ -889,7 +915,7 @@ xfs_inode_item_init(
 	iip->ili_inode = ip;
 
 	/*
-	   We have bzeroed memory. No need ...
+	   We have zeroed memory. No need ...
 	   iip->ili_extents_buf = NULL;
 	   iip->ili_pushbuf_flag = 0;
 	 */
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d90407088842..4970205a5e69 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -141,7 +141,10 @@ typedef struct xfs_inode_log_item {
 	unsigned short		ili_flags;	   /* misc flags */
 	unsigned short		ili_logged;	   /* flushed logged data */
 	unsigned int		ili_last_fields;   /* fields when flushed */
-	struct xfs_bmbt_rec_32	*ili_extents_buf;  /* array of logged exts */
+	struct xfs_bmbt_rec_32	*ili_extents_buf;  /* array of logged
+						      data exts */
+	struct xfs_bmbt_rec_32	*ili_aextents_buf; /* array of logged
+						      attr exts */
 	unsigned int		ili_pushbuf_flag;  /* one bit used in push_ail */
 
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index d46f2ccf3844..8fe8097215df 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -146,7 +146,7 @@ xfs_bulkstat_one(
 	buf->bs_extsize = INT_GET(dic->di_extsize, arch) << mp->m_sb.sb_blocklog;
 	buf->bs_extents = INT_GET(dic->di_nextents, arch);
 	buf->bs_gen = INT_GET(dic->di_gen, arch);
-	bzero(buf->bs_pad, sizeof(buf->bs_pad));
+	memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
 	buf->bs_dmevmask = INT_GET(dic->di_dmevmask, arch);
 	buf->bs_dmstate = INT_GET(dic->di_dmstate, arch);
 	buf->bs_aextents = INT_GET(dic->di_anextents, arch);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 942d07386d9e..d156b9cb7a7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1586,8 +1586,8 @@ xlog_unalloc_log(xlog_t *log)
  *		1. If first write of transaction, write start record
  *		2. Write log operation header (header per region)
  *		3. Find out if we can fit entire region into this iclog
- *		4. Potentially, verify destination bcopy ptr
- *		5. Bcopy (partial) region
+ *		4. Potentially, verify destination memcpy ptr
+ *		5. Memcpy (partial) region
  *		6. If partial copy, release iclog; otherwise, continue
  *			copying more regions into current iclog
  *	4. Mark want sync bit (in simulation mode)
@@ -1628,8 +1628,8 @@ xlog_write(xfs_mount_t *	mp,
     int		     start_rec_copy; /* # bytes to copy for start record */
     int		     partial_copy;   /* did we split a region? */
     int		     partial_copy_len;/* # bytes copied if split region */
-    int		     need_copy;	     /* # bytes need to bcopy this region */
-    int		     copy_len;	     /* # bytes actually bcopy'ing */
+    int		     need_copy;	     /* # bytes need to memcpy this region */
+    int		     copy_len;	     /* # bytes actually memcpy'ing */
     int		     copy_off;	     /* # bytes from entry start */
     int		     contwr;	     /* continued write of in-core log? */
     int		     firstwr = 0;    /* first write of transaction */
@@ -1733,7 +1733,7 @@ xlog_write(xfs_mount_t *	mp,
 
 	    /* Partial write last time? => (partial_copy != 0)
 	     * need_copy is the amount we'd like to copy if everything could
-	     * fit in the current bcopy.
+	     * fit in the current memcpy.
 	     */
 	    need_copy = reg[index].i_len - partial_copy_len;
 
@@ -1759,7 +1759,7 @@ xlog_write(xfs_mount_t *	mp,
 
 	    /* copy region */
 	    ASSERT(copy_len >= 0);
-	    bcopy(reg[index].i_addr + copy_off, (xfs_caddr_t)ptr, copy_len);
+	    memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
 	    xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
 
 	    /* make copy_len total bytes copied, including headers */
@@ -1836,7 +1836,7 @@ xlog_state_clean_log(xlog_t *log)
 				changed = 2;
 			}
 			INT_ZERO(iclog->ic_header.h_num_logops, ARCH_CONVERT);
-			bzero(iclog->ic_header.h_cycle_data,
+			memset(iclog->ic_header.h_cycle_data, 0,
 			      sizeof(iclog->ic_header.h_cycle_data));
 			INT_ZERO(iclog->ic_header.h_lsn, ARCH_CONVERT);
 		} else if (iclog->ic_state == XLOG_STATE_ACTIVE)
@@ -2064,7 +2064,7 @@ xlog_state_do_callback(
 		} while (first_iclog != iclog);
 		if (repeats && (repeats % 10) == 0) {
 			xfs_fs_cmn_err(CE_WARN, log->l_mp,
-				"xlog_state_do_callback: looping %d\n", repeats);
+				"xlog_state_do_callback: looping %d", repeats);
 		}
 	} while (!ioerrors && loopdidcallbacks);
 
@@ -2155,20 +2155,13 @@ xlog_state_done_syncing(
 		iclog->ic_state = XLOG_STATE_DONE_SYNC;
 	}
 
-	/*
-	 * Someone could be sleeping on the next iclog even though it is
-	 * in the ACTIVE state.	 We kick off one thread to force the
-	 * iclog buffer out.
-	 */
-	if (iclog->ic_next->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
-		sv_signal(&iclog->ic_next->ic_forcesema);
 	LOG_UNLOCK(log, s);
 	xlog_state_do_callback(log, aborted, iclog);	/* also cleans log */
 }	/* xlog_state_done_syncing */
 
 
 /*
- * Update counters atomically now that bcopy is done.
+ * Update counters atomically now that memcpy is done.
  */
 /* ARGSUSED */
 static inline void
@@ -2984,11 +2977,9 @@ xlog_state_sync(xlog_t	  *log,
 		uint	  flags)
 {
     xlog_in_core_t	*iclog;
-    int			already_slept = 0;
     SPLDECL(s);
 
 
-try_again:
     s = LOG_LOCK(log);
     iclog = log->l_iclog;
 
@@ -3009,39 +3000,12 @@ try_again:
 	}
 
 	if (iclog->ic_state == XLOG_STATE_ACTIVE) {
-		/*
-		 * We sleep here if we haven't already slept (e.g.
-		 * this is the first time we've looked at the correct
-		 * iclog buf) and the buffer before us is going to
-		 * be sync'ed.	We have to do that to ensure that the
-		 * log records go out in the proper order.  When it's
-		 * done, someone waiting on this buffer will be woken up
-		 * (maybe us) to flush this buffer out.
-		 *
-		 * Otherwise, we mark the buffer WANT_SYNC, and bump
-		 * up the refcnt so we can release the log (which drops
-		 * the ref count).  The state switch keeps new transaction
-		 * commits from using this buffer.  When the current commits
-		 * finish writing into the buffer, the refcount will drop to
-		 * zero and the buffer will go out then.
-		 */
-		if (!already_slept &&
-		    (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC |
-						 XLOG_STATE_SYNCING))) {
-			ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-			XFS_STATS_INC(xfsstats.xs_log_force_sleep);
-			sv_wait(&iclog->ic_prev->ic_forcesema, PSWP,
-				&log->l_icloglock, s);
-			already_slept = 1;
-			goto try_again;
-		} else {
-			iclog->ic_refcnt++;
-			xlog_state_switch_iclogs(log, iclog, 0);
-			LOG_UNLOCK(log, s);
-			if (xlog_state_release_iclog(log, iclog))
-				return XFS_ERROR(EIO);
-			s = LOG_LOCK(log);
-		}
+		iclog->ic_refcnt++;
+		xlog_state_switch_iclogs(log, iclog, 0);
+		LOG_UNLOCK(log, s);
+		if (xlog_state_release_iclog(log, iclog))
+			return XFS_ERROR(EIO);
+		s = LOG_LOCK(log);
 	}
 
 	if ((flags & XFS_LOG_SYNC) && /* sleep */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8d1676e1d157..125af631e3f2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -476,7 +476,7 @@ xlog_find_head(xlog_t  *log,
 	     * mkfs etc write a dummy unmount record to a fresh
 	     * log so we can store the uuid in there
 	     */
-	    xlog_warn("XFS: totally zeroed log\n");
+	    xlog_warn("XFS: totally zeroed log");
 	}
 
 	return 0;
@@ -873,9 +873,19 @@ xlog_find_tail(xlog_t  *log,
 	 * overwrite the unmount record after a clean unmount.
 	 *
 	 * Do this only if we are going to recover the filesystem
+	 *
+	 * NOTE: This used to say "if (!readonly)"
+	 * However on Linux, we can & do recover a read-only filesystem.
+	 * We only skip recovery if NORECOVERY is specified on mount,
+	 * in which case we would not be here.
+	 *
+	 * But... if the -device- itself is readonly, just skip this.
+	 * We can't recover this device anyway, so it won't matter.
 	 */
-	if (!readonly)
+
+	if (!bdev_read_only(log->l_mp->m_logdev_targp->pbr_bdev)) {
 		error = xlog_clear_stale_blocks(log, tail_lsn);
+	}
 #endif
 
 bread_err:
@@ -1242,7 +1252,7 @@ xlog_recover_add_to_cont_trans(xlog_recover_t	*trans,
 		/* finish copying rest of trans header */
 		xlog_recover_add_item(&trans->r_itemq);
 		ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
-		bcopy(dp, ptr, len); /* s, d, l */
+		memcpy(ptr, dp, len); /* d, s, l */
 		return 0;
 	}
 	item = item->ri_prev;
@@ -1251,7 +1261,7 @@ xlog_recover_add_to_cont_trans(xlog_recover_t	*trans,
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
 
 	ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
-	bcopy(dp , &ptr[old_len], len); /* s, d, l */
+	memcpy(&ptr[old_len], dp, len); /* d, s, l */
 	item->ri_buf[item->ri_cnt-1].i_len += len;
 	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
 	return 0;
@@ -1282,7 +1292,7 @@ xlog_recover_add_to_trans(xlog_recover_t	*trans,
 	if (!len)
 		return 0;
 	ptr = kmem_zalloc(len, 0);
-	bcopy(dp, ptr, len);
+	memcpy(ptr, dp, len);
 
 	in_f = (xfs_inode_log_format_t *)ptr;
 	item = trans->r_itemq;
@@ -1290,7 +1300,7 @@ xlog_recover_add_to_trans(xlog_recover_t	*trans,
 		ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
 		if (len == sizeof(xfs_trans_header_t))
 			xlog_recover_add_item(&trans->r_itemq);
-		bcopy(dp, &trans->r_theader, len); /* s, d, l */
+		memcpy(&trans->r_theader, dp, len); /* d, s, l */
 		return 0;
 	}
 	if (item->ri_prev->ri_total != 0 &&
@@ -1799,9 +1809,10 @@ xlog_recover_do_reg_buffer(xfs_mount_t		*mp,
 					       "dquot_buf_recover");
 		}
 		if (!error)
-		    bcopy(item->ri_buf[i].i_addr,		   /* source */
-		      xfs_buf_offset(bp, (uint)bit << XFS_BLI_SHIFT), /* dest */
-		      nbits<<XFS_BLI_SHIFT);			   /* length */
+			memcpy(xfs_buf_offset(bp,
+					(uint)bit << XFS_BLI_SHIFT),	/* dest */
+				item->ri_buf[i].i_addr,			/* source */
+				nbits<<XFS_BLI_SHIFT);			/* length */
 		i++;
 		bit += nbits;
 	}
@@ -2115,9 +2126,9 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 			      -1, ARCH_CONVERT);
 	/* the rest is in on-disk format */
 	if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) {
-		bcopy(item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
-		      (xfs_caddr_t) dip		 + sizeof(xfs_dinode_core_t),
-		      item->ri_buf[1].i_len  - sizeof(xfs_dinode_core_t));
+		memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t),
+			item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
+			item->ri_buf[1].i_len  - sizeof(xfs_dinode_core_t));
 	}
 
 	fields = in_f->ilf_fields;
@@ -2143,7 +2154,7 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 	switch (fields & XFS_ILOG_DFORK) {
 	case XFS_ILOG_DDATA:
 	case XFS_ILOG_DEXT:
-		bcopy(src, &dip->di_u, len);
+		memcpy(&dip->di_u, src, len);
 		break;
 
 	case XFS_ILOG_DBROOT:
@@ -2182,7 +2193,7 @@ xlog_recover_do_inode_trans(xlog_t		*log,
 		case XFS_ILOG_AEXT:
 			dest = XFS_DFORK_APTR(dip);
 			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
-			bcopy(src, dest, len);
+			memcpy(dest, src, len);
 			break;
 
 		case XFS_ILOG_ABROOT:
@@ -2341,7 +2352,7 @@ xlog_recover_do_dquot_trans(xlog_t		*log,
 		return XFS_ERROR(EIO);
 	}
 
-	bcopy(recddq, ddq, item->ri_buf[1].i_len);
+	memcpy(ddq, recddq, item->ri_buf[1].i_len);
 
 	ASSERT(dq_f->qlf_size == 2);
 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
@@ -2382,7 +2393,7 @@ xlog_recover_do_efi_trans(xlog_t		*log,
 
 	mp = log->l_mp;
 	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-	bcopy((char *)efi_formatp, (char *)&(efip->efi_format),
+	memcpy((char *)&(efip->efi_format), (char *)efi_formatp,
 	      sizeof(xfs_efi_log_format_t) +
 	      ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t)));
 	efip->efi_next_extent = efi_formatp->efi_nextents;
@@ -3131,7 +3142,7 @@ xlog_unpack_data(xlog_rec_header_t *rhead,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
 		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
 			    cmn_err(CE_DEBUG,
-				"XFS: LogR this is a LogV2 filesystem\n");
+				"XFS: LogR this is a LogV2 filesystem");
 		    }
 		    log->l_flags |= XLOG_CHKSUM_MISMATCH;
 	    }
@@ -3215,7 +3226,7 @@ xlog_do_recovery_pass(xlog_t	*log,
 	return ENOMEM;
     }
 
-    bzero(rhash, sizeof(rhash));
+    memset(rhash, 0, sizeof(rhash));
     if (tail_blk <= head_blk) {
 	for (blk_no = tail_blk; blk_no < head_blk; ) {
 	    if ((error = xlog_bread(log, blk_no, hblks, hbp)))
@@ -3521,17 +3532,20 @@ xlog_recover(xlog_t *log, int readonly)
 		 * error message.
 		 * ...but this is no longer true.  Now, unless you specify
 		 * NORECOVERY (in which case this function would never be
-		 * called), it enables read-write access long enough to do
-		 * recovery.
+		 * called), we just go ahead and recover.  We do this all
+		 * under the vfs layer, so we can get away with it unless
+		 * the device itself is read-only, in which case we fail.
 		 */
-		if (readonly) {
 #ifdef __KERNEL__
-			if ((error = xfs_recover_read_only(log)))
-				return error;
+		if ((error = xfs_dev_is_read_only(log->l_mp,
+						"recovery required"))) {
+			return error;
+		}
 #else
+		if (readonly) {
 			return ENOSPC;
-#endif
 		}
+#endif
 
 #ifdef __KERNEL__
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
@@ -3548,8 +3562,6 @@ xlog_recover(xlog_t *log, int readonly)
 #endif
 		error = xlog_do_recover(log, head_blk, tail_blk);
 		log->l_flags |= XLOG_RECOVERY_NEEDED;
-		if (readonly)
-			XFS_MTOVFS(log->l_mp)->vfs_flag |= VFS_RDONLY;
 	}
 	return error;
 }	/* xlog_recover */
@@ -3607,7 +3619,7 @@ xlog_recover_finish(xlog_t *log, int mfsi_flags)
 		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
 	} else {
 		cmn_err(CE_DEBUG,
-			"!Ending clean XFS mount for filesystem: %s\n",
+			"!Ending clean XFS mount for filesystem: %s",
 			log->l_mp->m_fsname);
 	}
 	return 0;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 13c88ba8039e..2625226c73c7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -31,6 +31,13 @@
  */
 
 #include <xfs.h>
+#include <linux/major.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+
+#ifndef EVMS_MAJOR
+#define EVMS_MAJOR	117
+#endif
 
 STATIC void	xfs_mount_reset_sbqflags(xfs_mount_t *);
 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
@@ -375,9 +382,9 @@ xfs_xlatesb(
 		    size == 1 ||
 		    xfs_sb_info[f].type == 1) {
 			if (dir > 0) {
-				bcopy(buf_ptr + first, mem_ptr + first, size);
+				memcpy(mem_ptr + first, buf_ptr + first, size);
 			} else {
-				bcopy(mem_ptr + first, buf_ptr + first, size);
+				memcpy(buf_ptr + first, mem_ptr + first, size);
 			}
 		} else {
 			switch (size) {
@@ -673,7 +680,7 @@ xfs_mountfs(
 		}
 		uuid_mounted=1;
 		ret64 = uuid_hash64(&sbp->sb_uuid);
-		bcopy(&ret64, &vfsp->vfs_fsid, sizeof(ret64));
+		memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
 	}
 
 	/*
@@ -904,7 +911,7 @@ xfs_mountfs(
 	rvp = XFS_ITOV(rip);
 	if ((rip->i_d.di_mode & IFMT) != IFDIR) {
 		cmn_err(CE_WARN, "XFS: corrupted root inode");
-		VMAP(rvp, rip, vmap);
+		VMAP(rvp, vmap);
 		prdev("Root inode %llu is not a directory",
 		      mp->m_dev, (unsigned long long)rip->i_ino);
 		rvp->v_flag |= VPURGE;
@@ -930,8 +937,7 @@ xfs_mountfs(
 
 	if (((quotaondisk && !XFS_IS_QUOTA_ON(mp)) ||
 	      (!quotaondisk && XFS_IS_QUOTA_ON(mp))) &&
-	    (bdev_read_only(mp->m_ddev_targp->pbr_bdev) ||
-	     bdev_read_only(mp->m_logdev_targp->pbr_bdev))) {
+	      xfs_dev_is_read_only(mp, "changing quota state")) {
 		cmn_err(CE_WARN,
 			"XFS: device %s is read-only, cannot change "
 			"quota state.  Please mount with%s quota option.",
@@ -952,7 +958,7 @@ xfs_mountfs(
 		 */
 		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
 		rvp->v_flag |= VPURGE;
-		VMAP(rvp, rip, vmap);
+		VMAP(rvp, vmap);
 		VN_RELE(rvp);
 		vn_purge(rvp, &vmap);
 		goto error3;
@@ -1023,14 +1029,12 @@ xfs_mountfs(
 	if (needquotamount) {
 		ASSERT(mp->m_qflags == 0);
 		mp->m_qflags = quotaflags;
-		rootqcheck = ((XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) &&
-				mp->m_dev == rootdev && needquotacheck);
-		if (rootqcheck && (error = xfs_quotacheck_read_only(mp)))
+		rootqcheck = (mp->m_dev == rootdev && needquotacheck);
+		if (rootqcheck && (error = xfs_dev_is_read_only(mp,
+					"quotacheck")))
 			goto error2;
 		if (xfs_qm_mount_quotas(mp))
 			xfs_mount_reset_sbqflags(mp);
-		if (rootqcheck)
-			XFS_MTOVFS(mp)->vfs_flag |= VFS_RDONLY;
 	}
 
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
@@ -1135,7 +1139,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	/*
 	 * clear all error tags on this filesystem
 	 */
-	bcopy(&(XFS_MTOVFS(mp)->vfs_fsid), &fsid, sizeof(int64_t));
+	memcpy(&fsid, &(XFS_MTOVFS(mp)->vfs_fsid), sizeof(int64_t));
 	(void) xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
 #endif
 
@@ -1149,15 +1153,17 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
 	int		have_logdev = (mp->m_logdev_targp != mp->m_ddev_targp);
 
 	if (mp->m_ddev_targp) {
-		pagebuf_lock_disable(mp->m_ddev_targp, 0);
+		xfs_free_buftarg(mp->m_ddev_targp);
 		mp->m_ddev_targp = NULL;
 	}
 	if (mp->m_rtdev_targp) {
-		pagebuf_lock_disable(mp->m_rtdev_targp, 1);
+		xfs_blkdev_put(mp->m_rtdev_targp->pbr_bdev);
+		xfs_free_buftarg(mp->m_rtdev_targp);
 		mp->m_rtdev_targp = NULL;
 	}
 	if (mp->m_logdev_targp && have_logdev) {
-		pagebuf_lock_disable(mp->m_logdev_targp, 1);
+		xfs_blkdev_put(mp->m_logdev_targp->pbr_bdev);
+		xfs_free_buftarg(mp->m_logdev_targp);
 		mp->m_logdev_targp = NULL;
 	}
 }
@@ -1725,3 +1731,71 @@ xfs_check_frozen(
 	if (level == XFS_FREEZE_TRANS)
 		atomic_inc(&mp->m_active_trans);
 }
+
+int
+xfs_blkdev_get(
+	const char		*name,
+	struct block_device	**bdevp)
+{
+	struct nameidata	nd;
+	int			error = 0;
+
+	error = path_lookup(name, LOOKUP_FOLLOW, &nd);
+	if (error) {
+		printk("XFS: Invalid device [%s], error=%d\n",
+				name, error);
+		return error;
+	}
+
+	/* I think we actually want bd_acquire here..  --hch */
+	*bdevp = bdget(kdev_t_to_nr(nd.dentry->d_inode->i_rdev));
+	if (*bdevp) {
+		error = blkdev_get(*bdevp, FMODE_READ|FMODE_WRITE, 0, BDEV_FS);
+	} else {
+		error = -ENOMEM;
+	}
+
+	path_release(&nd);
+	return -error;
+}
+
+void
+xfs_blkdev_put(
+	struct block_device	*bdev)
+{
+	blkdev_put(bdev, BDEV_FS);
+}
+
+void
+xfs_free_buftarg(
+	xfs_buftarg_t		*btp)
+{
+	pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
+	kfree(btp);
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+	struct block_device	*bdev)
+{
+	xfs_buftarg_t		*btp;
+
+	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+	btp->pbr_dev =  bdev->bd_dev;
+	btp->pbr_bdev = bdev;
+	btp->pbr_mapping = bdev->bd_inode->i_mapping;
+	btp->pbr_blocksize = PAGE_CACHE_SIZE;
+
+	switch (MAJOR(btp->pbr_dev)) {
+	case MD_MAJOR:
+	case EVMS_MAJOR:
+		btp->pbr_flags = PBR_ALIGNED_ONLY;
+		break;
+	case LVM_BLK_MAJOR:
+		btp->pbr_flags = PBR_SECTOR_ONLY;
+		break;
+	}
+
+	return btp;
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 51c86fea20c4..7011e001b6c3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -77,7 +77,6 @@ struct xfs_inode;
 struct xfs_perag;
 struct xfs_quotainfo;
 struct xfs_iocore;
-struct xfs_dio;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 
@@ -96,24 +95,18 @@ struct xfs_bmap_free;
  * minimize the number of memory indirections involved.
  */
 
-typedef int		(*xfs_dio_write_t)(struct xfs_dio *);
-typedef int		(*xfs_dio_read_t)(struct xfs_dio *);
-typedef int		(*xfs_strat_write_t)(struct xfs_iocore *, struct xfs_buf *);
 typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
 				xfs_fileoff_t, xfs_filblks_t, int,
 				xfs_fsblock_t *, xfs_extlen_t,
 				struct xfs_bmbt_irec *, int *,
 				struct xfs_bmap_free *);
 typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
-typedef int		(*xfs_rsync_t)(void *, int, xfs_off_t, xfs_off_t);
-typedef uint		(*xfs_lck_map_shared_t)(void *);
 typedef void		(*xfs_lock_t)(void *, uint);
 typedef void		(*xfs_lock_demote_t)(void *, uint);
 typedef int		(*xfs_lock_nowait_t)(void *, uint);
 typedef void		(*xfs_unlk_t)(void *, unsigned int);
 typedef void		(*xfs_chgtime_t)(void *, int);
 typedef xfs_fsize_t	(*xfs_size_t)(void *);
-typedef xfs_fsize_t	(*xfs_setsize_t)(void *, xfs_off_t);
 typedef xfs_fsize_t	(*xfs_lastbyte_t)(void *);
 
 typedef struct xfs_ioops {
@@ -295,8 +288,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_NOUUID	0x00004000	/* ignore uuid during mount */
 #define XFS_MOUNT_32BITINODES	0x00008000	/* do not create inodes above
 						 * 32 bits in size */
-#define XFS_MOUNT_IRIXSGID	0x00010000	/* Irix-style sgid inheritance */
-#define XFS_MOUNT_NOLOGFLUSH	0x00020000
+#define XFS_MOUNT_NOLOGFLUSH	0x00010000
 
 /*
  * Flags for m_cxfstype
@@ -436,6 +428,11 @@ int		xfs_syncsub(xfs_mount_t *, int, int, int *);
 void		xfs_initialize_perag(xfs_mount_t *, int);
 void		xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t);
 
+int		xfs_blkdev_get(const char *, struct block_device **);
+void		xfs_blkdev_put(struct block_device *);
+struct xfs_buftarg *xfs_alloc_buftarg(struct block_device *);
+void		xfs_free_buftarg(struct xfs_buftarg *);
+
 /*
  * Flags for freeze operations.
  */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index db7f44f0eb52..b728d271c7ee 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1039,14 +1039,14 @@ xfs_qm_unmount(
 		vp = XFS_ITOV(XFS_QI_UQIP(mp));
 		VN_RELE(vp);
 		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "UQUOTA busy vp=0x%x count=%d\n",
+			cmn_err(CE_WARN, "UQUOTA busy vp=0x%x count=%d",
 				vp, vn_count(vp));
 	}
 	if (XFS_IS_GQUOTA_ON(mp)) {
 		vp = XFS_ITOV(XFS_QI_GQIP(mp));
 		VN_RELE(vp);
 		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "GQUOTA busy vp=0x%x count=%d\n",
+			cmn_err(CE_WARN, "GQUOTA busy vp=0x%x count=%d",
 				vp, vn_count(vp));
 	}
 
@@ -1427,9 +1427,9 @@ xfs_qm_qino_alloc(
 		xfs_trans_cancel(tp, 0);
 		return (error);
 	}
-	bzero(&zerocr, sizeof(zerocr));
+	memset(&zerocr, 0, sizeof(zerocr));
 
-	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, mp->m_dev,
+	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, 0,
 				   &zerocr, 0, 1, ip, &committed))) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT);
@@ -1998,11 +1998,16 @@ xfs_qm_init_quotainos(
 	int		error;
 	__int64_t	sbflags;
 	uint		flags;
+	int		readonly;
+	vfs_t		*vfsp;
 
 	ASSERT(mp->m_quotainfo);
 	uip = gip = NULL;
+	error = 0;
 	sbflags = 0;
 	flags = 0;
+	vfsp = XFS_MTOVFS(mp);
+	readonly = vfsp->vfs_flag & VFS_RDONLY;
 
 	/*
 	 * Get the uquota and gquota inodes
@@ -2036,36 +2041,34 @@ xfs_qm_init_quotainos(
 	 * made above will get added to a transaction and logged in one of
 	 * the qino_alloc calls below.
 	 */
+
 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-		if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
-			return XFS_ERROR(EROFS);
 		if ((error = xfs_qm_qino_alloc(mp, &uip,
 					      sbflags | XFS_SB_UQUOTINO,
 					      flags | XFS_QMOPT_UQUOTA)))
-			return XFS_ERROR(error);
+			goto error;
 
 		flags &= ~XFS_QMOPT_SBVERSION;
 	}
 	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
-		if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) {
-			if (uip)
-				VN_RELE(XFS_ITOV(uip));
-			return XFS_ERROR(EROFS);
-		}
 		if ((error = xfs_qm_qino_alloc(mp, &gip,
 					      sbflags | XFS_SB_GQUOTINO,
 					      flags | XFS_QMOPT_GQUOTA))) {
 			if (uip)
 				VN_RELE(XFS_ITOV(uip));
 
-			return XFS_ERROR(error);
+			goto error;
 		}
 	}
 
 	XFS_QI_UQIP(mp) = uip;
 	XFS_QI_GQIP(mp) = gip;
 
-	return (0);
+error:
+	if (readonly)
+		vfsp->vfs_flag |= VFS_RDONLY;
+
+	return XFS_ERROR(error);
 }
 
 
@@ -2414,11 +2417,11 @@ xfs_qm_dqalloc_incore(
 		if ((dqp = xfs_qm_dqreclaim_one())) {
 			XFS_STATS_INC(xfsstats.xs_qm_dqreclaims);
 			/*
-			 * Just bzero the core here. The rest will get
+			 * Just zero the core here. The rest will get
 			 * reinitialized by caller. XXX we shouldn't even
-			 * do this bzero ...
+			 * do this zero ...
 			 */
-			bzero(&dqp->q_core, sizeof(dqp->q_core));
+			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 			*O_dqpp = dqp;
 			return (B_FALSE);
 		}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 73f22a296ec8..b3cfba7ecad0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -550,7 +550,7 @@ xfs_qm_scall_getqstat(
 
 	uip = gip = NULL;
 	tempuqip = tempgqip = B_FALSE;
-	bzero(out, sizeof(fs_quota_stat_t));
+	memset(out, 0, sizeof(fs_quota_stat_t));
 
 	out->qs_version = FS_QSTAT_VERSION;
 	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
@@ -885,7 +885,7 @@ xfs_qm_export_dquot(
 	xfs_disk_dquot_t	*src,
 	struct fs_disk_quota	*dst)
 {
-	bzero(dst, sizeof(*dst));
+	memset(dst, 0, sizeof(*dst));
 	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
 	dst->d_flags =
 		xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
@@ -1060,7 +1060,7 @@ again:
 			 * Sample vp mapping while holding the mplock, lest
 			 * we come across a non-existent vnode.
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			ireclaims = mp->m_ireclaims;
 			topino = mp->m_inodes;
 			XFS_MOUNT_IUNLOCK(mp);
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
index c7f8bb60e561..d016d2b6cf8a 100644
--- a/fs/xfs/xfs_quota_priv.h
+++ b/fs/xfs/xfs_quota_priv.h
@@ -181,7 +181,7 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 	  vmap_t dqvmap;		\
 	  vnode_t *dqvp;		\
 	  dqvp = XFS_ITOV(ip);		\
-	  VMAP(dqvp, ip, dqvmap);	\
+	  VMAP(dqvp, dqvmap);		\
 	  VN_RELE(dqvp);		\
 	}
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 82684350d6eb..7f5526f4417c 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -170,7 +170,7 @@ xfs_growfs_rt_alloc(
 				error = XFS_ERROR(EIO);
 				goto error_exit;
 			}
-			bzero(XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+			memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 			/*
 			 * Commit the transaction.
@@ -2322,7 +2322,7 @@ xfs_rtmount_inodes(
 		vmap_t		vmap;		/* vmap to delete vnode */
 
 		rbmvp = XFS_ITOV(mp->m_rbmip);
-		VMAP(rbmvp, mp->m_rbmip, vmap);
+		VMAP(rbmvp, vmap);
 		VN_RELE(rbmvp);
 		vn_purge(rbmvp, &vmap);
 		return error;
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 0076a84485e8..01dc65dc1158 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -230,7 +230,7 @@ xfs_ioerror_alert(
 	xfs_daddr_t		blkno)
 {
 	cmn_err(CE_ALERT,
- "I/O error in filesystem (\"%s\") meta-data dev 0x%x block 0x%llx\n"
+ "I/O error in filesystem (\"%s\") meta-data dev 0x%x block 0x%llx"
  "	 (\"%s\") error %d buf count %u",
 		(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
 		XFS_BUF_TARGET_DEV(bp),
@@ -362,7 +362,7 @@ xfs_inval_cached_pages(
 		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 		isize = XFS_SIZE(mp, io);
 		if (offset > isize) {
-			xfs_zero_eof(vp, io, offset, isize, offset, NULL);
+			xfs_zero_eof(vp, io, offset, isize, offset);
 		}
 		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f2f4c5d88738..96bf21108a24 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -42,8 +42,6 @@ struct xfs_inode;
 struct xfs_iocore;
 struct xfs_mount;
 struct xfs_trans;
-struct xfs_dio;
-struct pm;
 
 /*
  * Maximum count of bmaps used by read and write paths.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 292ad257bc38..6e8dd532c1dc 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -788,6 +788,7 @@ shut_us_down:
 	commit_lsn = xfs_log_done(mp, tp->t_ticket, log_flags);
 #endif
 
+	tp->t_commit_lsn = commit_lsn;
 	if (nvec > XFS_TRANS_LOGVEC_COUNT) {
 		kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
 	}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1845dd874a4b..5489e4dded40 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -378,7 +378,10 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res_used; /* # of resvd rt extents used */
 	xfs_log_ticket_t	t_ticket;	/* log mgr ticket */
 	sema_t			t_sema;		/* sema for commit completion */
-	xfs_lsn_t		t_lsn;		/* log seq num of trans commit*/
+	xfs_lsn_t		t_lsn;		/* log seq num of start of
+						 * transaction. */
+	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of 
+						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct	*t_dqinfo;	/* accting info for dquots */
 	xfs_trans_callback_t	t_callback;	/* transaction callback */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index e6bcf374e54c..8daceb174c4f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -472,7 +472,7 @@ shutdown_abort:
 	 */
 #if defined(DEBUG)
 	if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
-		cmn_err(CE_NOTE, "about to pop assert, bp == 0x%x\n", bp);
+		cmn_err(CE_NOTE, "about to pop assert, bp == 0x%x", bp);
 #endif
 	ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
 						(XFS_B_STALE|XFS_B_DELWRI));
@@ -880,7 +880,7 @@ xfs_trans_binval(
 	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
 	bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
 	bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
-	bzero((char *)(bip->bli_format.blf_data_map),
+	memset((char *)(bip->bli_format.blf_data_map), 0,
 	      (bip->bli_format.blf_map_size * sizeof(uint)));
 	lidp->lid_flags |= XFS_LID_DIRTY;
 	tp->t_flags |= XFS_TRANS_DIRTY;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b7fbf81b00da..0df7fc3075cf 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -414,20 +414,8 @@ xfs_trans_inode_broot_debug(
 		iip->ili_root_size = ip->i_df.if_broot_bytes;
 		iip->ili_orig_root =
 			(char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
-		bcopy((char*)(ip->i_df.if_broot), iip->ili_orig_root,
+		memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
 		      iip->ili_root_size);
 	}
 }
 #endif
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index aa6fe38dde5e..e08f8b727a65 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -308,23 +308,19 @@ extern struct xfsstats xfsstats;
 #endif	/* !CONFIG_PROC_FS */
 
 
-
-/* juggle IRIX device numbers - still used in ondisk structures */
-
-#ifndef __KERNEL__
-#define MKDEV(major, minor)	makedev(major, minor)
-#endif
-
-#define IRIX_DEV_BITSMAJOR	14
-#define IRIX_DEV_BITSMINOR	18
-#define IRIX_DEV_MAXMAJ		0x1ff
-#define IRIX_DEV_MAXMIN		0x3ffff
-#define IRIX_DEV_MAJOR(dev)	((int)(((unsigned)(dev)>>IRIX_DEV_BITSMINOR) \
-				    & IRIX_DEV_MAXMAJ))
-#define IRIX_DEV_MINOR(dev)	((int)((dev)&IRIX_DEV_MAXMIN))
-#define IRIX_MKDEV(major,minor) ((xfs_dev_t)(((major)<<IRIX_DEV_BITSMINOR) \
-				    | (minor&IRIX_DEV_MAXMIN)))
-
-#define IRIX_DEV_TO_KDEVT(dev)	MKDEV(IRIX_DEV_MAJOR(dev),IRIX_DEV_MINOR(dev))
+/*
+ * Juggle IRIX device numbers - still used in ondisk structures
+ */
+#define XFS_DEV_BITSMAJOR	14
+#define XFS_DEV_BITSMINOR	18
+#define XFS_DEV_MAXMAJ		0x1ff
+#define XFS_DEV_MAXMIN		0x3ffff
+#define XFS_DEV_MAJOR(dev)	((int)(((unsigned)(dev)>>XFS_DEV_BITSMINOR) \
+				    & XFS_DEV_MAXMAJ))
+#define XFS_DEV_MINOR(dev)	((int)((dev)&XFS_DEV_MAXMIN))
+#define XFS_MKDEV(major,minor) ((xfs_dev_t)(((major)<<XFS_DEV_BITSMINOR) \
+				    | (minor&XFS_DEV_MAXMIN)))
+
+#define XFS_DEV_TO_KDEVT(dev)	mk_kdev(XFS_DEV_MAJOR(dev),XFS_DEV_MINOR(dev))
 
 #endif	/* !__XFS_TYPES_H */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 9fcee5b06cdc..d82bf3a6a7be 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -132,7 +132,7 @@ xfs_dir_ialloc(
 					   the inode. */
 	mode_t		mode,
 	nlink_t		nlink,
-	dev_t		rdev,
+	xfs_dev_t	rdev,
 	cred_t		*credp,
 	prid_t		prid,		/* project id */
 	int		okalloc,	/* ok to allocate new space */
@@ -345,7 +345,7 @@ xfs_bump_ino_vers2(
 
 	ip->i_d.di_version = XFS_DINODE_VERSION_2;
 	ip->i_d.di_onlink = 0;
-	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 	mp = tp->t_mountp;
 	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
 		s = XFS_SB_LOCK(mp);
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index db4da1029291..ac8f5b92ba0f 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -76,7 +76,7 @@ xfs_dir_ialloc(
 	struct xfs_inode	*dp,
 	mode_t			mode,
 	nlink_t			nlink,
-	dev_t			rdev,
+	xfs_dev_t		rdev,
 	struct cred		*credp,
 	prid_t			prid,
 	int			okalloc,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index cce4a74d5dfe..97f6f65feca6 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -225,7 +225,7 @@ xfs_start_flags(
 	    (ap->logbufs < XLOG_NUM_ICLOGS ||
 	     ap->logbufs > XLOG_MAX_ICLOGS)) {
 		cmn_err(CE_WARN, 
-			"XFS: invalid logbufs value: %d [not %d-%d]\n",
+			"XFS: invalid logbufs value: %d [not %d-%d]",
 			ap->logbufs, XLOG_NUM_ICLOGS, XLOG_MAX_ICLOGS);
 		return XFS_ERROR(EINVAL);
 	}
@@ -237,7 +237,7 @@ xfs_start_flags(
 	    ap->logbufsize != 128 * 1024 &&
 	    ap->logbufsize != 256 * 1024) {
 		cmn_err(CE_WARN,
-	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]\n",
+	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
 			ap->logbufsize);
 		return XFS_ERROR(EINVAL);
 	}
@@ -274,13 +274,9 @@ xfs_start_flags(
 	if (ap->flags & XFSMNT_OSYNCISOSYNC)
 		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
 
-	/* Default on Linux */
-	if (1 || ap->flags & XFSMNT_32BITINODES)
+	if (ap->flags & XFSMNT_32BITINODES)
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 
-	if (ap->flags & XFSMNT_IRIXSGID)
-		mp->m_flags |= XFS_MOUNT_IRIXSGID;
-
 	if (ap->flags & XFSMNT_IOSIZE) {
 		if (ap->iosizelog > XFS_MAX_IO_LOG ||
 		    ap->iosizelog < XFS_MIN_IO_LOG) {
@@ -392,145 +388,115 @@ xfs_finish_flags(
 }
 
 /*
- * xfs_cmountfs
+ * xfs_mount
  *
- * This function is the common mount file system function for XFS.
+ * The file system configurations are:
+ *	(1) device (partition) with data and internal log
+ *	(2) logical volume with data and log subvolumes.
+ *	(3) logical volume with data, log, and realtime subvolumes.
+ *
+ * The Linux VFS took care of finding and opening the data volume for
+ * us.  We have to handle the other two (if present) here.
  */
 STATIC int
-xfs_cmountfs(
+xfs_mount(
 	vfs_t			*vfsp,
-	dev_t			ddev,
-	dev_t			logdev,
-	dev_t			rtdev,
-	struct xfs_mount_args	*ap,
-	struct cred		*cr)
+	struct xfs_mount_args	*args,
+	cred_t			*credp)
 {
 	xfs_mount_t		*mp;
+	struct block_device	*ddev, *logdev, *rtdev;
 	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
 	int			error = 0;
 
-	/*
-	 * Allocate VFS private data (xfs mount structure).
-	 */
-	mp = xfs_mount_init();
-
-	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+	ddev = vfsp->vfs_super->s_bdev;
+	logdev = rtdev = NULL;
 
 	/*
-	 * Open data, real time, and log devices now - order is important.
+	 * Open real time and log devices - order is important.
 	 */
-	mp->m_ddev_targp = pagebuf_lock_enable(ddev, 0);
-	if (IS_ERR(mp->m_ddev_targp)) {
-		error = PTR_ERR(mp->m_ddev_targp);
-		goto error2;
+	if (args->logname[0]) {
+		error = xfs_blkdev_get(args->logname, &logdev);
+		if (error)
+			return error;
 	}
-
-	if (rtdev != 0) {
-		mp->m_rtdev_targp = pagebuf_lock_enable(rtdev, 1);
-		if (IS_ERR(mp->m_rtdev_targp)) {
-			error = PTR_ERR(mp->m_rtdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 0);
-			goto error2;
+	if (args->rtname[0]) {
+		error = xfs_blkdev_get(args->rtname, &rtdev);
+		if (error) {
+			xfs_blkdev_put(logdev);
+			return error;
 		}
 
 		if (rtdev == ddev || rtdev == logdev) {
 			cmn_err(CE_WARN,
 	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-			error = EINVAL;
-			pagebuf_lock_disable(mp->m_ddev_targp, 0);
-			goto error2;
+			xfs_blkdev_put(logdev);
+			xfs_blkdev_put(rtdev);
+			return EINVAL;
 		}
-		
-		/* Set the realtime device's block size */
-		set_blocksize(mp->m_rtdev_targp->pbr_bdev, 512);
 	}
 
-	if (logdev != ddev) {
-		mp->m_logdev_targp = pagebuf_lock_enable(logdev, 1);
-		if (IS_ERR(mp->m_logdev_targp)) {
-			error = PTR_ERR(mp->m_logdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 1);
-			if (mp->m_rtdev_targp)
-				pagebuf_lock_disable(mp->m_rtdev_targp, 1);
-			goto error2;
-		}
+	/*
+	 * Allocate VFS private data (xfs mount structure).
+	 */
+	mp = xfs_mount_init();
 
-		/* Set the log device's block size */
-		set_blocksize(mp->m_logdev_targp->pbr_bdev, 512);
+	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+
+	mp->m_ddev_targp = xfs_alloc_buftarg(ddev);
+	if (rtdev != NULL) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev);
+		set_blocksize(rtdev, 512);
+	}
+	if (logdev != NULL && logdev != ddev) {
+		mp->m_logdev_targp = xfs_alloc_buftarg(logdev);
+		set_blocksize(logdev, 512);
 	} else {
 		mp->m_logdev_targp = mp->m_ddev_targp;
 	}
 	
-	if ((error = xfs_start_flags(ap, mp, ronly)))
-		goto error3;
+	error = xfs_start_flags(args, mp, ronly);
+	if (error)
+		goto error;
 
-	if ((error = xfs_readsb(mp)))
-		goto error3;
+	error = xfs_readsb(mp);
+	if (error)
+		goto error;
 
-	if ((error = xfs_finish_flags(ap, mp, ronly))) {
+	error = xfs_finish_flags(args, mp, ronly);
+	if (error) {
 		xfs_freesb(mp);
-		goto error3;
+		goto error;
 	}
 
-	pagebuf_target_blocksize(mp->m_ddev_targp, mp->m_sb.sb_blocksize);
-	if (logdev != 0 && logdev != ddev)
-		pagebuf_target_blocksize(mp->m_logdev_targp,
-					mp->m_sb.sb_blocksize);
-	if (rtdev != 0)
-		pagebuf_target_blocksize(mp->m_rtdev_targp,
-					mp->m_sb.sb_blocksize);
+	mp->m_ddev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	if (logdev != 0 && logdev != ddev) {
+		mp->m_logdev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	}
+	if (rtdev != 0) {
+		mp->m_rtdev_targp->pbr_blocksize = mp->m_sb.sb_blocksize;
+	}
 
 	mp->m_cxfstype = XFS_CXFS_NOT;
-	error = xfs_mountfs(vfsp, mp, ddev, 0);
+	error = xfs_mountfs(vfsp, mp, ddev->bd_dev, 0);
 	if (error)
-		goto error3;
+		goto error;
 	return 0;
 
- error3:
-	/* It's impossible to get here before buftargs are filled */
+ error:
 	xfs_binval(mp->m_ddev_targp);
-	pagebuf_lock_disable(mp->m_ddev_targp, 0);
-	if (logdev && logdev != ddev) {
+	if (logdev != NULL && logdev != ddev) {
 		xfs_binval(mp->m_logdev_targp);
-		pagebuf_lock_disable(mp->m_logdev_targp, 1);
 	}
-	if (rtdev != 0) {
+	if (rtdev != NULL) {
 		xfs_binval(mp->m_rtdev_targp);
-		pagebuf_lock_disable(mp->m_rtdev_targp, 1);
-	}
- error2:
-	if (error) {
-		xfs_mount_free(mp, 1);
 	}
+	xfs_unmountfs_close(mp, NULL);
+	xfs_mount_free(mp, 1);
 	return error;
 }
 
 /*
- * xfs_mount
- *
- * The file system configurations are:
- *	(1) device (partition) with data and internal log
- *	(2) logical volume with data and log subvolumes.
- *	(3) logical volume with data, log, and realtime subvolumes.
- */
-STATIC int
-xfs_mount(
-	vfs_t			*vfsp,
-	struct xfs_mount_args	*args,
-	cred_t			*credp)
-{
-	dev_t		ddev;
-	dev_t		logdev;
-	dev_t		rtdev;
-	int		error;
-
-	error = spectodevs(vfsp->vfs_super, args, &ddev, &logdev, &rtdev);
-	if (!error)
-		error = xfs_cmountfs(vfsp, ddev, logdev, rtdev, args, credp);
-	return (error);
-}
-
-/*
  * xfs_ibusy searches for a busy inode in the mounted file system.
  *
  * Return 0 if there are no active inodes otherwise return 1.
@@ -1149,7 +1115,7 @@ xfs_syncsub(
 			 * in taking a snapshot of the vnode version number
 			 * for use in calling vn_get().
 			 */
-			VMAP(vp, ip, vmap);
+			VMAP(vp, vmap);
 			IPOINTER_INSERT(ip, mp);
 
 			vp = vn_get(vp, &vmap);
@@ -1601,6 +1567,39 @@ xfs_syncsub(
 	return XFS_ERROR(last_error);
 }
 
+STATIC void
+xfs_initialize_vnode(
+	bhv_desc_t	*bdp,
+	vnode_t		*vp,
+	bhv_desc_t	*inode_bhv,
+	int		unlock)
+{
+	xfs_inode_t	*ip = XFS_BHVTOI(inode_bhv);
+	struct inode	*inode = LINVFS_GET_IP(vp);
+
+	if (vp->v_fbhv == NULL) {
+		vp->v_vfsp = bhvtovfs(bdp);
+		bhv_desc_init(&(ip->i_bhv_desc), ip, vp, &xfs_vnodeops);
+		bhv_insert_initial(VN_BHV_HEAD(vp), &(ip->i_bhv_desc));
+	}
+
+	vp->v_type = IFTOVT(ip->i_d.di_mode);
+	/* Have we been called during the new inode create process,
+	 * in which case we are too early to fill in the linux inode.
+	 */
+	if (vp->v_type == VNON)
+		return;
+
+	xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+
+	/* For new inodes we need to set the ops vectors,
+	 * and unlock the inode.
+	 */
+	if (unlock && (inode->i_state & I_NEW)) {
+		linvfs_set_inode_ops(inode);
+		unlock_new_inode(inode);
+	}
+}
 
 /*
  * xfs_vget - called by DMAPI to get vnode from file handle
@@ -1653,11 +1652,6 @@ xfs_vget(
 	inode = LINVFS_GET_IP((*vpp));
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-	error = linvfs_revalidate_core(inode, ATTR_COMM);
-	if (error) {
-		iput(inode);
-		return XFS_ERROR(error);
-	}
 	return 0;
 }
 
@@ -1670,6 +1664,7 @@ vfsops_t xfs_vfsops = {
 	.vfs_statvfs		= xfs_statvfs,
 	.vfs_sync		= xfs_sync,
 	.vfs_vget		= xfs_vget,
+	.vfs_init_vnode		= xfs_initialize_vnode,
 	.vfs_force_shutdown	= xfs_do_force_shutdown,
 #ifdef CONFIG_XFS_DMAPI
 	.vfs_dmapi_mount	= xfs_dm_mount,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 46c42a2cd8ed..07f1af20c108 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -208,7 +208,7 @@ xfs_getattr(
 				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
 		}
 	} else {
-		vap->va_rdev = IRIX_DEV_TO_KDEVT(ip->i_df.if_u2.if_rdev);
+		vap->va_rdev = ip->i_df.if_u2.if_rdev;
 		vap->va_blksize = BLKDEV_IOSIZE;
 	}
 
@@ -1970,7 +1970,7 @@ xfs_create(
 	vnode_t			*vp=NULL;
 	xfs_trans_t		*tp;
 	xfs_mount_t		*mp;
-	dev_t			rdev;
+	xfs_dev_t		rdev;
 	int			error;
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
@@ -2955,8 +2955,7 @@ xfs_mkdir(
 	xfs_inode_t		*cdp;	/* inode of created dir */
 	vnode_t			*cvp;	/* vnode of created dir */
 	xfs_trans_t		*tp;
-	dev_t			rdev;
-	mode_t			mode;
+	xfs_dev_t		rdev;
 	xfs_mount_t		*mp;
 	int			cancel_flags;
 	int			error;
@@ -3062,8 +3061,9 @@ xfs_mkdir(
 	 * create the directory inode.
 	 */
 	rdev = (vap->va_mask & AT_RDEV) ? vap->va_rdev : 0;
-	mode = IFDIR | (vap->va_mode & ~IFMT);
-	error = xfs_dir_ialloc(&tp, dp, mode, 2, rdev, credp, prid, resblks > 0,
+	error = xfs_dir_ialloc(&tp, dp, 
+			MAKEIMODE(vap->va_type,vap->va_mode), 2,
+			rdev, credp, prid, resblks > 0,
 		&cdp, NULL);
 	if (error) {
 		if (error == ENOSPC)
@@ -3521,7 +3521,7 @@ xfs_symlink(
 	xfs_inode_t		*ip;
 	int			error;
 	int			pathlen;
-	dev_t			rdev;
+	xfs_dev_t		rdev;
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
 	boolean_t		dp_joined_to_trans;
@@ -3702,7 +3702,7 @@ xfs_symlink(
 	 */
 	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
 		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
-		bcopy(target_path, ip->i_df.if_u1.if_data, pathlen);
+		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
 		ip->i_d.di_size = pathlen;
 
 		/*
@@ -3743,7 +3743,7 @@ xfs_symlink(
 			}
 			pathlen -= byte_cnt;
 
-			bcopy(cur_chunk, XFS_BUF_PTR(bp), byte_cnt);
+			memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
 			cur_chunk += byte_cnt;
 
 			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
@@ -3859,10 +3859,10 @@ xfs_fid2(
 	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
 	xfid->fid_pad = 0;
 	/*
-	 * use bcopy because the inode is a long long and there's no
+	 * use memcpy because the inode is a long long and there's no
 	 * assurance that xfid->fid_ino is properly aligned.
 	 */
-	bcopy(&ip->i_ino, &xfid->fid_ino, sizeof xfid->fid_ino);
+	memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino));
 	xfid->fid_gen = ip->i_d.di_gen;
 
 	return 0;
@@ -4504,9 +4504,9 @@ xfs_zero_remaining_bytes(
 					  mp, bp, XFS_BUF_ADDR(bp));
 			break;
 		}
-		bzero(XFS_BUF_PTR(bp) +
+		memset(XFS_BUF_PTR(bp) +
 			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
-		      lastoffset - offset + 1);
+		      0, lastoffset - offset + 1);
 		XFS_BUF_UNDONE(bp);
 		XFS_BUF_UNREAD(bp);
 		XFS_BUF_WRITE(bp);
@@ -4937,7 +4937,6 @@ vnodeops_t xfs_vnodeops = {
 	.vop_rwlock		= xfs_rwlock,
 	.vop_rwunlock		= xfs_rwunlock,
 	.vop_bmap		= xfs_bmap,
-	.vop_strategy		= xfs_strategy,
 	.vop_reclaim		= xfs_reclaim,
 	.vop_attr_get		= xfs_attr_get,
 	.vop_attr_set		= xfs_attr_set,
diff --git a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
index 28ec0bd10af3..f4d1a6b4951e 100644
--- a/fs/xfs/xfsidbg.c
+++ b/fs/xfs/xfsidbg.c
@@ -53,6 +53,9 @@ static void	xfsidbg_xagf(xfs_agf_t *);
 static void	xfsidbg_xagi(xfs_agi_t *);
 static void	xfsidbg_xaildump(xfs_mount_t *);
 static void	xfsidbg_xalloc(xfs_alloc_arg_t *);
+#ifdef DEBUG
+static void	xfsidbg_xalmtrace(xfs_mount_t *);
+#endif
 static void	xfsidbg_xattrcontext(xfs_attr_list_context_t *);
 static void	xfsidbg_xattrleaf(xfs_attr_leafblock_t *);
 static void	xfsidbg_xattrsf(xfs_attr_shortform_t *);
@@ -196,6 +199,29 @@ static int	kdbm_xfs_xalloc(
 	return 0;
 }
 
+#ifdef DEBUG
+static int	kdbm_xfs_xalmtrace(
+	int	argc,
+	const char **argv,
+	const char **envp,
+	struct pt_regs *regs)
+{
+	unsigned long addr;
+	int nextarg = 1;
+	long offset = 0;
+	int diag;
+
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs);
+	if (diag)
+		return diag;
+
+	xfsidbg_xalmtrace((xfs_mount_t *) addr);
+	return 0;
+}
+#endif /* DEBUG */
+
 static int	kdbm_xfs_xattrcontext(
 	int	argc,
 	const char **argv,
@@ -1700,18 +1726,19 @@ static int	kdbm_vn(
 /* pagebuf stuff */
 
 static char	*pb_flag_vals[] = {
-	"READ", "WRITE", "MAPPED", "PARTIAL",
-	"ASYNC", "NONE", "DELWRI", "FREED", "SYNC",
-	"MAPPABLE", "STALE", "FS_MANAGED", "RELEASE",
-	"LOCK", "TRYLOCK", "ALLOCATE", "FILE_ALLOCATE", "DONT_BLOCK",
-	"DIRECT", "LOCKABLE", "NEXT_KEY", "ENTER_PAGES",
-	"ALL_PAGES_MAPPED", "SOME_INVALID_PAGES", "ADDR_ALLOCATED",
-	"MEM_ALLOCATED", "GRIO", "FORCEIO", "SHUTDOWN",
-	NULL };
+/*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL", "ASYNC",
+/*  5 */ "NONE", "DELWRI", "FREED", "SYNC", "MAPPABLE",
+/* 10 */ "STALE", "FS_MANAGED", "INVALID12", "LOCK", "TRYLOCK",
+/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "LOCKABLE",
+/* 20 */ "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED", "MEM_ALLOCATED",
+	 "FORCEIO",
+/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29",
+/* 30 */ "INVALID30", "INVALID31",
+	 NULL };
 
 static char	*pbm_flag_vals[] = {
-	"EOF", "HOLE", "DELAY", "FLUSH_OVERLAPS",
-	"READAHEAD", "UNWRITTEN", "DONTALLOC", "NEW",
+	"EOF", "HOLE", "DELAY", "INVALID0x08",
+	"INVALID0x10", "UNWRITTEN", "INVALID0x40", "INVALID0x80",
 	NULL };
 
 
@@ -1964,7 +1991,7 @@ pb_trace_core(
 
 		if ((trace->event < EV_SIZE-1) && event_names[trace->event]) {
 			event = event_names[trace->event];
-		} else if (trace->event == EV_SIZE) {
+		} else if (trace->event == EV_SIZE-1) {
 			event = (char *)trace->misc;
 		} else {
 			event = value;
@@ -2081,6 +2108,10 @@ static struct xif {
 				"Dump XFS AIL for a mountpoint" },
   {  "xalloc",	kdbm_xfs_xalloc,	"<xfs_alloc_arg_t>",
 				"Dump XFS allocation args structure" },
+#ifdef DEBUG
+  {  "xalmtrc",	kdbm_xfs_xalmtrace,	"<xfs_mount_t>",
+				"Dump XFS alloc mount-point trace" },
+#endif
   {  "xattrcx", kdbm_xfs_xattrcontext,	"<xfs_attr_list_context_t>",
 				"Dump XFS attr_list context struct"},
   {  "xattrlf", kdbm_xfs_xattrleaf,	"<xfs_attr_leafblock_t>",
@@ -2245,13 +2276,14 @@ static char *xfs_alloctype[] = {
 /*
  * Prototypes for static functions.
  */
+#ifdef DEBUG
+static int xfs_alloc_trace_entry(ktrace_entry_t *ktep);
+#endif
 static void xfs_broot(xfs_inode_t *ip, xfs_ifork_t *f);
 static void xfs_btalloc(xfs_alloc_block_t *bt, int bsz);
 static void xfs_btbmap(xfs_bmbt_block_t *bt, int bsz);
 static void xfs_btino(xfs_inobt_block_t *bt, int bsz);
 static void xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary);
-static void xfs_convert_extent(xfs_bmbt_rec_64_t *rp, xfs_dfiloff_t *op,
-			xfs_dfsbno_t *sp, xfs_dfilblks_t *cp, int *fp);
 static void xfs_dastate_path(xfs_da_state_path_t *p);
 static void xfs_dir2data(void *addr, int size);
 static void xfs_dir2leaf(xfs_dir2_leaf_t *leaf, int size);
@@ -2277,6 +2309,137 @@ static void xfs_xnode_fork(char *name, xfs_ifork_t *f);
  * Static functions.
  */
 
+#ifdef DEBUG
+/*
+ * Print xfs alloc trace buffer entry.
+ */
+static int
+xfs_alloc_trace_entry(ktrace_entry_t *ktep)
+{		  
+	static char *modagf_flags[] = {
+		"magicnum",
+		"versionnum",
+		"seqno",
+		"length",
+		"roots",
+		"levels",
+		"flfirst",
+		"fllast",
+		"flcount",
+		"freeblks",
+		"longest",
+		NULL
+	};
+
+	if (((__psint_t)ktep->val[0] & 0xffff) == 0)
+		return 0;
+	switch ((long)ktep->val[0] & 0xffffL) {
+	case XFS_ALLOC_KTRACE_ALLOC:
+		kdb_printf("alloc %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf(
+	"agno %d agbno %d minlen %d maxlen %d mod %d prod %d minleft %d\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6], 
+			(__psunsigned_t)ktep->val[7], 
+			(__psunsigned_t)ktep->val[8],
+			(__psunsigned_t)ktep->val[9], 
+			(__psunsigned_t)ktep->val[10]);
+		kdb_printf("total %d alignment %d len %d type %s otype %s\n",
+			(__psunsigned_t)ktep->val[11],
+			(__psunsigned_t)ktep->val[12],
+			(__psunsigned_t)ktep->val[13],
+			xfs_alloctype[((__psint_t)ktep->val[14]) >> 16],
+			xfs_alloctype[((__psint_t)ktep->val[14]) & 0xffff]);
+		kdb_printf("wasdel %d wasfromfl %d isfl %d userdata %d\n",
+			((__psint_t)ktep->val[15] & (1 << 3)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 2)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 1)) != 0,
+			((__psint_t)ktep->val[15] & (1 << 0)) != 0);
+		break;
+	case XFS_ALLOC_KTRACE_FREE:
+		kdb_printf("free %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("agno %d agbno %d len %d isfl %d\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psint_t)ktep->val[7]);
+		break;
+	case XFS_ALLOC_KTRACE_MODAGF:
+		kdb_printf("modagf %s[%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		printflags((__psint_t)ktep->val[4], modagf_flags, "modified");
+		kdb_printf("seqno %d length %d roots b %d c %d\n",
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		kdb_printf("levels b %d c %d flfirst %d fllast %d flcount %d\n",
+			(__psunsigned_t)ktep->val[9],
+			(__psunsigned_t)ktep->val[10],
+			(__psunsigned_t)ktep->val[11],
+			(__psunsigned_t)ktep->val[12],
+			(__psunsigned_t)ktep->val[13]);
+		kdb_printf("freeblks %d longest %d\n",
+			(__psunsigned_t)ktep->val[14],
+			(__psunsigned_t)ktep->val[15]);
+		break;
+
+	case XFS_ALLOC_KTRACE_UNBUSY:
+		kdb_printf("unbusy %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	case XFS_ALLOC_KTRACE_BUSY:
+		kdb_printf("busy %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d agbno %d len %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	case XFS_ALLOC_KTRACE_BUSYSEARCH:
+		kdb_printf("busy-search %s [%s %d] mp 0x%p\n",
+			(char *)ktep->val[1],
+			ktep->val[2] ? (char *)ktep->val[2] : "",
+			(__psint_t)ktep->val[0] >> 16,
+			(xfs_mount_t *)ktep->val[3]);
+		kdb_printf("      agno %d agbno %d len %d slot %d tp 0x%x\n",
+			(__psunsigned_t)ktep->val[4],
+			(__psunsigned_t)ktep->val[5],
+			(__psunsigned_t)ktep->val[6],
+			(__psunsigned_t)ktep->val[7],
+			(__psunsigned_t)ktep->val[8]);
+		break;
+	default:
+		kdb_printf("unknown alloc trace record\n");
+		break;
+	}
+	return 1;
+}
+#endif /* DEBUG */
 
 /*
  * Print an xfs in-inode bmap btree root.
@@ -2359,18 +2522,17 @@ xfs_btbmap(xfs_bmbt_block_t *bt, int bsz)
 	kdb_printf("rightsib %Lx\n", INT_GET(bt->bb_rightsib, ARCH_CONVERT));
 	if (INT_ISZERO(bt->bb_level, ARCH_CONVERT)) {
 		for (i = 1; i <= INT_GET(bt->bb_numrecs, ARCH_CONVERT); i++) {
-			xfs_bmbt_rec_64_t *r;
-			xfs_dfiloff_t o;
-			xfs_dfsbno_t s;
-			xfs_dfilblks_t c;
-			int fl;
+			xfs_bmbt_rec_t *r;
+			xfs_bmbt_irec_t	irec;
 
-			r = (xfs_bmbt_rec_64_t *)XFS_BTREE_REC_ADDR(bsz,
+			r = (xfs_bmbt_rec_t *)XFS_BTREE_REC_ADDR(bsz,
 				xfs_bmbt, bt, i, 0);
-			xfs_convert_extent(r, &o, &s, &c, &fl);
-			kdb_printf("rec %d startoff %Ld ", i, o);
-			kdb_printf("startblock %Lx ", s);
-			kdb_printf("blockcount %Ld flag %d\n", c, fl);
+
+			xfs_bmbt_disk_get_all((xfs_bmbt_rec_t *)r, &irec);
+			kdb_printf("rec %d startoff %Ld startblock %Lx blockcount %Ld flag %d\n",
+				i, irec.br_startoff,
+				(__uint64_t)irec.br_startblock, 
+				irec.br_blockcount, irec.br_state);
 		}
 	} else {
 		int mxr;
@@ -2473,31 +2635,6 @@ xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary)
 }
 
 /*
- * Convert an external extent descriptor to internal form.
- */
-static void
-xfs_convert_extent(xfs_bmbt_rec_64_t *rp, xfs_dfiloff_t *op, xfs_dfsbno_t *sp,
-		   xfs_dfilblks_t *cp, int *fp)
-{
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
-
-	flag = (int)((INT_GET(rp->l0, ARCH_CONVERT)) >> (64 - 1 ));
-	o = ((xfs_fileoff_t)INT_GET(rp->l0, ARCH_CONVERT) &
-			   (((__uint64_t)1 << ( 64 - 1	)) - 1) ) >> 9;
-	s = (((xfs_fsblock_t)INT_GET(rp->l0, ARCH_CONVERT) & (((__uint64_t)1 << ( 9 )) - 1) ) << 43) |
-			   (((xfs_fsblock_t)INT_GET(rp->l1, ARCH_CONVERT)) >> 21);
-	c = (xfs_filblks_t)(INT_GET(rp->l1, ARCH_CONVERT) & (((__uint64_t)1 << ( 21 )) - 1) );
-	*op = o;
-	*sp = s;
-	*cp = c;
-	*fp = flag;
-}
-
-
-/*
  * Print an xfs_da_state_path structure.
  */
 static void
@@ -2890,11 +3027,8 @@ static void
 xfs_xexlist_fork(xfs_inode_t *ip, int whichfork)
 {
 	int nextents, i;
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
 	xfs_ifork_t *ifp;
+	xfs_bmbt_irec_t irec;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_flags & XFS_IFEXTENTS) {
@@ -2902,12 +3036,12 @@ xfs_xexlist_fork(xfs_inode_t *ip, int whichfork)
 		kdb_printf("inode 0x%p %cf extents 0x%p nextents 0x%x\n",
 			ip, "da"[whichfork], ifp->if_u1.if_extents, nextents);
 		for (i = 0; i < nextents; i++) {
-			xfs_convert_extent(
-				(xfs_bmbt_rec_64_t *)&ifp->if_u1.if_extents[i],
-				&o, &s, &c, &flag);
+			xfs_bmbt_get_all(&ifp->if_u1.if_extents[i], &irec);
 			kdb_printf(
 		"%d: startoff %Ld startblock %s blockcount %Ld flag %d\n",
-				i, o, xfs_fmtfsblock(s, ip->i_mount), c, flag);
+			i, irec.br_startoff,
+			xfs_fmtfsblock(irec.br_startblock, ip->i_mount),
+			irec.br_blockcount, irec.br_state);
 		}
 	}
 }
@@ -3030,7 +3164,33 @@ xfsidbg_xalloc(xfs_alloc_arg_t *args)
 		args->wasfromfl, args->isfl, args->userdata);
 }
 
+#ifdef DEBUG
+/*
+ * Print out all the entries in the alloc trace buf corresponding
+ * to the given mount point.
+ */
+static void
+xfsidbg_xalmtrace(xfs_mount_t *mp)
+{
+	ktrace_entry_t	*ktep;
+	ktrace_snap_t	kts;
+	extern ktrace_t	*xfs_alloc_trace_buf;
+
+	if (xfs_alloc_trace_buf == NULL) {
+		kdb_printf("The xfs alloc trace buffer is not initialized\n");
+		return;
+	}
 
+	ktep = ktrace_first(xfs_alloc_trace_buf, &kts);
+	while (ktep != NULL) {
+		if ((__psint_t)ktep->val[0] && (xfs_mount_t *)ktep->val[3] == mp) {
+			(void)xfs_alloc_trace_entry(ktep);
+			kdb_printf("\n");
+		}
+		ktep = ktrace_next(xfs_alloc_trace_buf, &kts);
+	}
+}
+#endif /* DEBUG */
 
 /*
  * Print an attr_list() context structure.
@@ -3199,14 +3359,12 @@ xfsidbg_xbmalla(xfs_bmalloca_t *a)
 static void
 xfsidbg_xbrec(xfs_bmbt_rec_64_t *r)
 {
-	xfs_dfiloff_t o;
-	xfs_dfsbno_t s;
-	xfs_dfilblks_t c;
-	int flag;
+	xfs_bmbt_irec_t	irec;
 
-	xfs_convert_extent(r, &o, &s, &c, &flag);
+	xfs_bmbt_get_all((xfs_bmbt_rec_t *)r, &irec);
 	kdb_printf("startoff %Ld startblock %Lx blockcount %Ld flag %d\n",
-		o, s, c, flag);
+		irec.br_startoff, (__uint64_t)irec.br_startblock, 
+		irec.br_blockcount, irec.br_state);
 }
 
 /*
@@ -4412,8 +4570,7 @@ xfsidbg_xmount(xfs_mount_t *mp)
 		"OSYNC",	/* 0x2000 */
 		"NOUUID",	/* 0x4000 */
 		"32BIT",	/* 0x8000 */
-		"IRIXSGID",	/* 0x10000 */
-		"NOLOGFLUSH",	/* 0x20000 */
+		"NOLOGFLUSH",	/* 0x10000 */
 		0
 	};
 
@@ -4438,9 +4595,9 @@ xfsidbg_xmount(xfs_mount_t *mp)
 		mp->m_ail_gen, &mp->m_sb);
 	kdb_printf("sb_lock 0x%p sb_bp 0x%p dev 0x%x logdev 0x%x rtdev 0x%x\n",
 		&mp->m_sb_lock, mp->m_sb_bp,
-		mp->m_ddev_targp->pbr_dev,
-		mp->m_logdev_targp->pbr_dev,
-		mp->m_rtdev_targp->pbr_dev);
+		mp->m_ddev_targp ? mp->m_ddev_targp->pbr_dev : 0,
+		mp->m_logdev_targp ? mp->m_logdev_targp->pbr_dev : 0,
+		mp->m_rtdev_targp ? mp->m_rtdev_targp->pbr_dev : 0);
 	kdb_printf("bsize %d agfrotor %d agirotor %d ihash 0x%p ihsize %d\n",
 		mp->m_bsize, mp->m_agfrotor, mp->m_agirotor,
 		mp->m_ihash, mp->m_ihsize);
@@ -4734,13 +4891,17 @@ xfsidbg_xperag(xfs_mount_t *mp)
 		if (pag->pagi_init)
 			kdb_printf("	i_freecount %d i_inodeok %d\n",
 				pag->pagi_freecount, pag->pagi_inodeok);
-
-		for (busy = 0; busy < XFS_PAGB_NUM_SLOTS; busy++) {
-			kdb_printf("	 %04d: start %d length %d tp 0x%p\n",
-				busy,
-				pag->pagb_list[busy].busy_start,
-				pag->pagb_list[busy].busy_length,
-				pag->pagb_list[busy].busy_tp);
+		if (pag->pagf_init) {
+			for (busy = 0; busy < XFS_PAGB_NUM_SLOTS; busy++) {
+				if (pag->pagb_list[busy].busy_length != 0) {
+					kdb_printf(
+		"	 %04d: start %d length %d tp 0x%p\n",
+					    busy,
+					    pag->pagb_list[busy].busy_start,
+					    pag->pagb_list[busy].busy_length,
+					    pag->pagb_list[busy].busy_tp);
+				}
+			}
 		}
 	}
 }
@@ -5138,8 +5299,10 @@ xfsidbg_xtp(xfs_trans_t *tp)
 		tp->t_log_res, tp->t_blk_res, tp->t_blk_res_used);
 	kdb_printf("rt res %d rt res used %d\n", tp->t_rtx_res,
 		tp->t_rtx_res_used);
-	kdb_printf("ticket 0x%lx lsn %s\n",
-		(unsigned long) tp->t_ticket, xfs_fmtlsn(&tp->t_lsn));
+	kdb_printf("ticket 0x%lx lsn %s commit_lsn %s\n",
+		(unsigned long) tp->t_ticket,
+		xfs_fmtlsn(&tp->t_lsn),
+		xfs_fmtlsn(&tp->t_commit_lsn));
 	kdb_printf("callback 0x%p callarg 0x%p\n",
 		tp->t_callback, tp->t_callarg);
 	kdb_printf("icount delta %ld ifree delta %ld\n",
author	David S. Miller <davem@nuts.ninka.net>	2002-10-15 07:41:35 -0700
committer	David S. Miller <davem@nuts.ninka.net>	2002-10-15 07:41:35 -0700
commit	8fbfe7cd5594010a23cb4e81786d1fb8015ffdee (patch)
tree	b5be190f22984395209823ec3cac1c76fc93f67f /fs
parent	e22f7f5fd43205bfd20ea3a7bb4e689cb3f3d278 (diff)
parent	5a7728c6d3eb83df9d120944cca4cf476dd326a1 (diff)